import re import random import nltk import numpy as np from typing import List, Dict, Optional import time from collections import Counter import statistics from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import wordnet # Advanced imports with fallbacks def safe_import_with_fallback(module_name, component=None): """Safe import with fallback handling""" try: if component: module = __import__(module_name, fromlist=[component]) return getattr(module, component), True else: return __import__(module_name), True except ImportError: return None, False except Exception: return None, False # Load advanced models print("๐Ÿš€ Loading Universal AI Text Humanizer...") SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_fallback('sentence_transformers', 'SentenceTransformer') pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_fallback('transformers', 'pipeline') try: from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity SKLEARN_AVAILABLE = True except ImportError: SKLEARN_AVAILABLE = False try: import torch TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False class UniversalAITextHumanizer: """ Universal AI Text Humanizer for All Business Use Cases Based on QuillBot and Walter Writes AI research Simplified interface with only Natural/Conversational modes """ def __init__(self, enable_gpu=True): print("๐ŸŒ Initializing Universal AI Text Humanizer...") print("๐ŸŽฏ Designed for E-commerce, Marketing, SEO & All Business Needs") self.enable_gpu = enable_gpu and TORCH_AVAILABLE # Initialize models and databases self._load_models() self._initialize_universal_patterns() print("โœ… Universal AI Text Humanizer ready for all use cases!") self._print_status() def _load_models(self): """Load AI models with graceful fallbacks""" self.similarity_model = None self.paraphraser = None # Load sentence transformer for quality control if SENTENCE_TRANSFORMERS_AVAILABLE: try: device = 'cuda' if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu' self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device) print("โœ… Advanced similarity model loaded") except Exception as e: print(f"โš ๏ธ Similarity model unavailable: {e}") # Load paraphrasing model if TRANSFORMERS_AVAILABLE: try: device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1 self.paraphraser = pipeline( "text2text-generation", model="google/flan-t5-small", device=device, max_length=256 ) print("โœ… AI paraphrasing model loaded") except Exception as e: print(f"โš ๏ธ Paraphrasing model unavailable: {e}") # Fallback similarity using TF-IDF if SKLEARN_AVAILABLE: self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000) else: self.tfidf_vectorizer = None def _initialize_universal_patterns(self): """Initialize patterns based on QuillBot & Walter Writes research""" # Universal word replacements (business-friendly) self.word_replacements = { # Formal business terms -> Natural alternatives "utilize": "use", "demonstrate": "show", "facilitate": "help", "implement": "set up", "consequently": "so", "furthermore": "also", "moreover": "plus", "nevertheless": "but", "subsequently": "then", "accordingly": "therefore", "regarding": "about", "concerning": "about", "approximately": "about", "endeavor": "try", "commence": "start", "terminate": "end", "obtain": "get", "purchase": "buy", "examine": "check", "analyze": "look at", "construct": "build", "establish": "create", "methodology": "method", "systematic": "organized", "comprehensive": "complete", "significant": "important", "substantial": "large", "optimal": "best", "sufficient": "enough", "adequate": "good", "exceptional": "great", "fundamental": "basic", "essential": "key", "crucial": "important", "paramount": "very important", "imperative": "must", "mandatory": "required", "optimization": "improvement", "enhancement": "upgrade", "implementation": "setup", "utilization": "use", "evaluation": "review", "assessment": "check", "validation": "proof", "verification": "confirmation", "consolidation": "combining", "integration": "merging", "transformation": "change", "modification": "change" } # AI-specific phrases to replace (QuillBot research) self.ai_phrase_replacements = { "it is important to note that": "notably", "it should be emphasized that": "importantly", "it is worth mentioning that": "by the way", "it is crucial to understand that": "remember", "from a practical standpoint": "practically", "in terms of implementation": "when implementing", "with respect to the aforementioned": "about this", "as previously mentioned": "as noted", "in light of this": "because of this", "it is imperative to understand": "you should know", "one must consider": "consider", "it is evident that": "clearly", "it can be observed that": "we can see", "upon careful consideration": "after thinking", "in the final analysis": "ultimately" } # Professional contractions (universal appeal) self.contractions = { "do not": "don't", "does not": "doesn't", "did not": "didn't", "will not": "won't", "would not": "wouldn't", "should not": "shouldn't", "could not": "couldn't", "cannot": "can't", "is not": "isn't", "are not": "aren't", "was not": "wasn't", "were not": "weren't", "have not": "haven't", "has not": "hasn't", "had not": "hadn't", "I am": "I'm", "you are": "you're", "he is": "he's", "she is": "she's", "it is": "it's", "we are": "we're", "they are": "they're", "I have": "I've", "you have": "you've", "we have": "we've", "they have": "they've", "I will": "I'll", "you will": "you'll", "we will": "we'll", "they will": "they'll" } # Natural transition words (Walter Writes research) self.natural_transitions = [ "Also", "Plus", "And", "Then", "So", "But", "However", "Still", "Now", "Well", "Actually", "Besides", "Additionally", "What's more", "On top of that", "Beyond that" ] def preserve_structure(self, original: str, processed: str) -> str: """Preserve original text structure (paragraphs, formatting)""" # Split by double newlines (paragraphs) original_paragraphs = re.split(r'\n\s*\n', original) if len(original_paragraphs) <= 1: return processed # Split processed text into sentences processed_sentences = sent_tokenize(processed) # Try to maintain paragraph structure result_paragraphs = [] sentence_idx = 0 for para in original_paragraphs: para_sentences = sent_tokenize(para) para_sentence_count = len(para_sentences) if sentence_idx + para_sentence_count <= len(processed_sentences): para_processed = ' '.join(processed_sentences[sentence_idx:sentence_idx + para_sentence_count]) result_paragraphs.append(para_processed) sentence_idx += para_sentence_count else: # Add remaining sentences to this paragraph remaining = ' '.join(processed_sentences[sentence_idx:]) if remaining: result_paragraphs.append(remaining) break return '\n\n'.join(result_paragraphs) def apply_word_replacements(self, text: str, intensity: float = 0.7) -> str: """Apply universal word replacements""" words = word_tokenize(text) modified_words = [] for word in words: word_clean = word.lower().strip('.,!?;:"') if word_clean in self.word_replacements and random.random() < intensity: replacement = self.word_replacements[word_clean] # Preserve case if word.isupper(): replacement = replacement.upper() elif word.istitle(): replacement = replacement.title() modified_words.append(replacement) else: modified_words.append(word) # Reconstruct with proper spacing result = "" for i, word in enumerate(modified_words): if i > 0 and word not in ".,!?;:\"')": result += " " result += word return result def apply_contractions(self, text: str, style: str, intensity: float = 0.6) -> str: """Apply contractions based on style""" if style == "natural" and intensity < 0.5: intensity *= 0.7 # Less aggressive for natural style for formal, contracted in self.contractions.items(): if random.random() < intensity: pattern = r'\b' + re.escape(formal) + r'\b' text = re.sub(pattern, contracted, text, flags=re.IGNORECASE) return text def replace_ai_phrases(self, text: str, intensity: float = 0.8) -> str: """Replace AI-specific phrases""" for ai_phrase, replacement in self.ai_phrase_replacements.items(): if ai_phrase in text.lower(): if random.random() < intensity: # Preserve case if ai_phrase[0].isupper() or text.find(ai_phrase.title()) != -1: replacement = replacement.capitalize() text = text.replace(ai_phrase, replacement) text = text.replace(ai_phrase.title(), replacement.title()) return text def vary_sentence_structure(self, text: str, style: str, intensity: float = 0.4) -> str: """Add sentence variety based on style""" sentences = sent_tokenize(text) varied_sentences = [] for sentence in sentences: if len(sentence.split()) > 8 and random.random() < intensity: # Add natural transitions occasionally if style == "conversational" and random.random() < 0.3: transition = random.choice(self.natural_transitions) sentence = transition + ", " + sentence.lower() # Split long sentences occasionally (Walter Writes technique) elif len(sentence.split()) > 15 and random.random() < 0.2: words = sentence.split() mid_point = len(words) // 2 # Find a natural break point for i in range(mid_point-2, mid_point+3): if i < len(words) and words[i].lower() in ['and', 'but', 'so', 'because']: first_part = ' '.join(words[:i]) + '.' second_part = ' '.join(words[i+1:]) if second_part: second_part = second_part[0].upper() + second_part[1:] varied_sentences.extend([first_part, second_part]) continue varied_sentences.append(sentence) return ' '.join(varied_sentences) def apply_advanced_paraphrasing(self, text: str, style: str, intensity: float = 0.3) -> str: """Apply AI paraphrasing if available""" if not self.paraphraser or intensity < 0.6: return text sentences = sent_tokenize(text) paraphrased_sentences = [] for sentence in sentences: if len(sentence.split()) > 10 and random.random() < intensity * 0.4: try: # Style-specific prompts if style == "conversational": prompt = f"Make this more conversational and natural: {sentence}" else: prompt = f"Rewrite this naturally: {sentence}" result = self.paraphraser( prompt, max_length=min(150, len(sentence) + 30), min_length=max(10, len(sentence) // 2), temperature=0.7, do_sample=True ) paraphrased = result[0]['generated_text'].replace(prompt, '').strip().strip('"\'') # Quality check if (paraphrased and len(paraphrased) > 5 and len(paraphrased) < len(sentence) * 1.8 and not paraphrased.lower().startswith(('sorry', 'i cannot'))): paraphrased_sentences.append(paraphrased) else: paraphrased_sentences.append(sentence) except Exception: paraphrased_sentences.append(sentence) else: paraphrased_sentences.append(sentence) return ' '.join(paraphrased_sentences) def calculate_similarity(self, text1: str, text2: str) -> float: """Calculate semantic similarity""" if self.similarity_model: try: embeddings1 = self.similarity_model.encode([text1]) embeddings2 = self.similarity_model.encode([text2]) similarity = np.dot(embeddings1[0], embeddings2[0]) / ( np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0]) ) return float(similarity) except Exception: pass # Fallback to TF-IDF if self.tfidf_vectorizer and SKLEARN_AVAILABLE: try: tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2]) similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] return float(similarity) except Exception: pass # Basic word overlap fallback words1 = set(word_tokenize(text1.lower())) words2 = set(word_tokenize(text2.lower())) if not words1 or not words2: return 1.0 if text1 == text2 else 0.0 intersection = words1.intersection(words2) union = words1.union(words2) return len(intersection) / len(union) if union else 1.0 def humanize_text_universal(self, text: str, style: str = "natural", intensity: float = 0.7) -> Dict: """ Universal text humanization for all business use cases Args: text: Input text to humanize style: 'natural' or 'conversational' intensity: Transformation intensity (0.0 to 1.0) Returns: Dictionary with results and metrics """ if not text.strip(): return { "original_text": text, "humanized_text": text, "similarity_score": 1.0, "changes_made": [], "processing_time_ms": 0.0, "style": style, "intensity": intensity, "structure_preserved": True } start_time = time.time() original_text = text humanized_text = text changes_made = [] # Phase 1: Replace AI-specific phrases if intensity > 0.2: before = humanized_text humanized_text = self.replace_ai_phrases(humanized_text, intensity * 0.9) if humanized_text != before: changes_made.append("Removed AI phrases") # Phase 2: Universal word replacements if intensity > 0.3: before = humanized_text humanized_text = self.apply_word_replacements(humanized_text, intensity * 0.8) if humanized_text != before: changes_made.append("Improved word choice") # Phase 3: Add contractions if intensity > 0.4: before = humanized_text humanized_text = self.apply_contractions(humanized_text, style, intensity * 0.7) if humanized_text != before: changes_made.append("Added natural contractions") # Phase 4: Vary sentence structure if intensity > 0.5: before = humanized_text humanized_text = self.vary_sentence_structure(humanized_text, style, intensity * 0.4) if humanized_text != before: changes_made.append("Improved sentence flow") # Phase 5: Advanced paraphrasing (if available and high intensity) if intensity > 0.7 and self.paraphraser: before = humanized_text humanized_text = self.apply_advanced_paraphrasing(humanized_text, style, intensity) if humanized_text != before: changes_made.append("Enhanced with AI paraphrasing") # Phase 6: Preserve structure humanized_text = self.preserve_structure(original_text, humanized_text) # Calculate quality metrics similarity_score = self.calculate_similarity(original_text, humanized_text) processing_time = (time.time() - start_time) * 1000 # Quality control - revert if too different if similarity_score < 0.7: print(f"โš ๏ธ Similarity too low ({similarity_score:.3f}), reverting changes") humanized_text = original_text similarity_score = 1.0 changes_made = ["Reverted - maintained original meaning"] return { "original_text": original_text, "humanized_text": humanized_text, "similarity_score": similarity_score, "changes_made": changes_made, "processing_time_ms": processing_time, "style": style, "intensity": intensity, "structure_preserved": True, "word_count_original": len(original_text.split()), "word_count_humanized": len(humanized_text.split()), "character_count_original": len(original_text), "character_count_humanized": len(humanized_text) } def _print_status(self): """Print current status""" print("\n๐Ÿ“Š UNIVERSAL AI TEXT HUMANIZER STATUS:") print("-" * 45) print(f"๐Ÿง  Advanced Similarity: {'โœ…' if self.similarity_model else 'โŒ'}") print(f"๐Ÿค– AI Paraphrasing: {'โœ…' if self.paraphraser else 'โŒ'}") print(f"๐Ÿ“Š TF-IDF Fallback: {'โœ…' if self.tfidf_vectorizer else 'โŒ'}") print(f"๐Ÿš€ GPU Acceleration: {'โœ…' if self.enable_gpu else 'โŒ'}") print(f"๐ŸŒ Universal Patterns: โœ… LOADED") print(f"๐Ÿ“ Word Replacements: โœ… {len(self.word_replacements)} mappings") print(f"๐Ÿ”ค AI Phrase Detection: โœ… {len(self.ai_phrase_replacements)} patterns") print(f"๐Ÿ’ฌ Contractions: โœ… {len(self.contractions)} patterns") print(f"๐Ÿ—๏ธ Structure Preservation: โœ… ENABLED") # Calculate feature completeness features = [ bool(self.similarity_model), bool(self.paraphraser), bool(self.tfidf_vectorizer), True, # Universal patterns True, # Structure preservation True # Quality control ] completeness = (sum(features) / len(features)) * 100 print(f"๐ŸŽฏ System Completeness: {completeness:.1f}%") if completeness >= 80: print("๐ŸŽ‰ READY FOR ALL BUSINESS USE CASES!") elif completeness >= 60: print("โœ… Core features ready - some advanced features may be limited") else: print("โš ๏ธ Basic mode - install additional dependencies for full features") # Test function if __name__ == "__main__": humanizer = UniversalAITextHumanizer() # Test cases for different business scenarios test_cases = [ { "name": "E-commerce Product Description", "text": "Furthermore, this product demonstrates exceptional quality and utilizes advanced materials to ensure optimal performance. Subsequently, customers will experience significant improvements in their daily activities.", "style": "natural" }, { "name": "Marketing Copy", "text": "Moreover, our comprehensive solution facilitates unprecedented optimization of business processes. Therefore, organizations should implement our platform to obtain optimal results.", "style": "conversational" }, { "name": "SEO Blog Content", "text": "It is important to note that search engine optimization requires systematic approaches. Subsequently, websites must utilize comprehensive strategies to enhance their visibility.", "style": "natural" } ] print(f"\n๐Ÿงช TESTING UNIVERSAL HUMANIZER") print("=" * 40) for i, test_case in enumerate(test_cases, 1): print(f"\n๐Ÿ”ฌ Test {i}: {test_case['name']}") print("-" * 50) print(f"๐Ÿ“ Original: {test_case['text']}") result = humanizer.humanize_text_universal( text=test_case['text'], style=test_case['style'], intensity=0.7 ) print(f"โœจ Humanized: {result['humanized_text']}") print(f"๐Ÿ“Š Similarity: {result['similarity_score']:.3f}") print(f"โšก Processing: {result['processing_time_ms']:.1f}ms") print(f"๐Ÿ”ง Changes: {', '.join(result['changes_made'])}") print(f"\n๐ŸŽ‰ Universal testing completed!") print(f"๐ŸŒ Ready for E-commerce, Marketing, SEO & All Business Use Cases!")