Spaces:
Sleeping
Sleeping
Commit ·
5c9a55b
1
Parent(s): 5b7b927
adv humanizer
Browse files- app.py +246 -154
- diagnose_and_fix.py +228 -0
- install_production.py +354 -0
- requirements.txt +17 -7
- text_humanizer_production.py +585 -0
app.py
CHANGED
|
@@ -1,120 +1,204 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import time
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
-
# Import our
|
| 7 |
-
from
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
humanizer = AITextHumanizer()
|
| 13 |
-
print("✅ Humanizer loaded successfully!")
|
| 14 |
-
except Exception as e:
|
| 15 |
-
print(f"❌ Error loading humanizer: {e}")
|
| 16 |
-
humanizer = None
|
| 17 |
|
| 18 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"""
|
| 20 |
-
Hugging Face Spaces interface
|
| 21 |
"""
|
| 22 |
if not text.strip():
|
| 23 |
-
return "⚠️ Please enter some text to humanize.", "",
|
| 24 |
|
| 25 |
if humanizer is None:
|
| 26 |
-
return "❌ Error: Humanizer not loaded properly.
|
| 27 |
|
| 28 |
try:
|
| 29 |
start_time = time.time()
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
result = humanizer.
|
| 33 |
text=text,
|
| 34 |
style=style.lower(),
|
| 35 |
-
intensity=intensity
|
|
|
|
| 36 |
)
|
| 37 |
|
| 38 |
processing_time = (time.time() - start_time) * 1000
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
except Exception as e:
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
# Create the
|
| 54 |
with gr.Blocks(
|
| 55 |
-
title="🤖➡️👤 AI Text Humanizer",
|
| 56 |
theme=gr.themes.Soft(),
|
| 57 |
css="""
|
| 58 |
.main-header {
|
| 59 |
text-align: center;
|
| 60 |
-
background: linear-gradient(
|
| 61 |
color: white;
|
| 62 |
-
padding:
|
| 63 |
-
border-radius:
|
| 64 |
-
margin-bottom:
|
|
|
|
| 65 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
.stats-box {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
background: #f8f9fa;
|
| 68 |
padding: 15px;
|
| 69 |
border-radius: 8px;
|
| 70 |
-
border
|
| 71 |
-
}
|
| 72 |
-
.warning-box {
|
| 73 |
-
background: #fff3cd;
|
| 74 |
-
border: 1px solid #ffeaa7;
|
| 75 |
-
color: #856404;
|
| 76 |
-
padding: 10px;
|
| 77 |
-
border-radius: 5px;
|
| 78 |
margin: 10px 0;
|
| 79 |
}
|
| 80 |
"""
|
| 81 |
-
) as
|
| 82 |
|
| 83 |
-
gr.HTML("""
|
| 84 |
<div class="main-header">
|
| 85 |
-
<h1>🤖➡️👤 AI Text Humanizer</h1>
|
| 86 |
-
<p>
|
| 87 |
-
<p><em>
|
| 88 |
</div>
|
| 89 |
""")
|
| 90 |
|
| 91 |
-
#
|
| 92 |
-
if
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
⚠️ <strong>Note:</strong> Paraphrasing models not available. Advanced paraphrasing disabled.
|
| 106 |
-
</div>
|
| 107 |
-
""")
|
| 108 |
|
| 109 |
-
with gr.Tab("
|
| 110 |
with gr.Row():
|
| 111 |
with gr.Column(scale=1):
|
| 112 |
gr.HTML("<h3>📝 Input</h3>")
|
| 113 |
|
| 114 |
input_text = gr.Textbox(
|
| 115 |
label="Text to Humanize",
|
| 116 |
-
placeholder="Paste your AI-generated text here...\n\nExample: Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities..
|
| 117 |
-
lines=
|
| 118 |
max_lines=20
|
| 119 |
)
|
| 120 |
|
|
@@ -122,148 +206,156 @@ with gr.Blocks(
|
|
| 122 |
style_dropdown = gr.Dropdown(
|
| 123 |
choices=["Natural", "Casual", "Conversational"],
|
| 124 |
value="Natural",
|
| 125 |
-
label="🎨 Humanization Style"
|
|
|
|
| 126 |
)
|
| 127 |
|
| 128 |
intensity_slider = gr.Slider(
|
| 129 |
minimum=0.1,
|
| 130 |
maximum=1.0,
|
| 131 |
-
value=0.
|
| 132 |
step=0.1,
|
| 133 |
-
label="⚡ Intensity Level"
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
with gr.Column(scale=1):
|
| 143 |
gr.HTML("<h3>✨ Output</h3>")
|
| 144 |
|
| 145 |
output_text = gr.Textbox(
|
| 146 |
label="Humanized Text",
|
| 147 |
-
lines=
|
| 148 |
max_lines=20,
|
| 149 |
show_copy_button=True
|
| 150 |
)
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
stats_output = gr.Markdown(
|
| 153 |
-
label="📊
|
| 154 |
value="Results will appear here after processing..."
|
| 155 |
)
|
| 156 |
|
| 157 |
-
with gr.Tab("📊 Examples &
|
| 158 |
-
gr.HTML("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
# Examples
|
|
|
|
|
|
|
| 161 |
examples = gr.Examples(
|
| 162 |
examples=[
|
| 163 |
[
|
| 164 |
"Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy. Nevertheless, it is crucial to understand that human oversight remains essential for optimal performance.",
|
| 165 |
"Conversational",
|
| 166 |
-
0.8
|
|
|
|
| 167 |
],
|
| 168 |
[
|
| 169 |
-
"The implementation of
|
| 170 |
"Natural",
|
| 171 |
-
0.
|
|
|
|
| 172 |
],
|
| 173 |
[
|
| 174 |
-
"
|
| 175 |
"Casual",
|
| 176 |
-
0.
|
|
|
|
| 177 |
],
|
| 178 |
[
|
| 179 |
-
"
|
| 180 |
"Conversational",
|
| 181 |
-
0.9
|
|
|
|
| 182 |
]
|
| 183 |
],
|
| 184 |
-
inputs=[input_text, style_dropdown, intensity_slider],
|
| 185 |
-
outputs=[output_text, stats_output],
|
| 186 |
fn=humanize_text_hf,
|
| 187 |
-
cache_examples=False
|
|
|
|
| 188 |
)
|
| 189 |
|
|
|
|
| 190 |
gr.HTML("""
|
| 191 |
-
<div
|
| 192 |
-
<h3>
|
| 193 |
-
<
|
| 194 |
-
<
|
| 195 |
-
<
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
<li><strong>Sentence Restructuring:</strong> varies length and structure for natural flow</li>
|
| 200 |
-
<li><strong>Natural Imperfections:</strong> adds human-like variations and casual touches</li>
|
| 201 |
-
<li><strong>Context-Aware Processing:</strong> maintains meaning while improving readability</li>
|
| 202 |
-
</ul>
|
| 203 |
-
</div>
|
| 204 |
-
|
| 205 |
-
<div class="stats-box" style="margin-top: 15px;">
|
| 206 |
-
<h4>🎨 Style Guide:</h4>
|
| 207 |
-
<ul>
|
| 208 |
-
<li><strong>Natural (0.5-0.7):</strong> Professional content with human touch - good for business writing</li>
|
| 209 |
-
<li><strong>Casual (0.6-0.8):</strong> Blog posts, articles, informal content - relaxed but clear</li>
|
| 210 |
-
<li><strong>Conversational (0.7-1.0):</strong> Social media, very informal text - like talking to a friend</li>
|
| 211 |
-
</ul>
|
| 212 |
-
</div>
|
| 213 |
-
|
| 214 |
-
<div class="stats-box" style="margin-top: 15px;">
|
| 215 |
-
<h4>⚡ Performance & Features:</h4>
|
| 216 |
-
<ul>
|
| 217 |
-
<li><strong>Similarity Preservation:</strong> Maintains 85-95% semantic similarity to original</li>
|
| 218 |
-
<li><strong>Fast Processing:</strong> ~500ms average response time</li>
|
| 219 |
-
<li><strong>Robust Fallbacks:</strong> Works even when advanced models aren't available</li>
|
| 220 |
-
<li><strong>Quality Control:</strong> Automatic quality checks prevent over-transformation</li>
|
| 221 |
-
<li><strong>Dependency Resilient:</strong> Graceful degradation when libraries are missing</li>
|
| 222 |
-
</ul>
|
| 223 |
-
</div>
|
| 224 |
-
|
| 225 |
-
<div class="stats-box" style="margin-top: 15px;">
|
| 226 |
-
<h4>🛠️ Technical Features:</h4>
|
| 227 |
-
<ul>
|
| 228 |
-
<li><strong>Multiple Similarity Methods:</strong> Advanced transformers → TF-IDF → word overlap fallbacks</li>
|
| 229 |
-
<li><strong>Intelligent Processing:</strong> Context-aware transformations based on text type</li>
|
| 230 |
-
<li><strong>Quality Assurance:</strong> Automatic reversion if similarity drops too low</li>
|
| 231 |
-
<li><strong>Graceful Degradation:</strong> Works with minimal dependencies (just NLTK)</li>
|
| 232 |
-
</ul>
|
| 233 |
-
</div>
|
| 234 |
</div>
|
| 235 |
""")
|
| 236 |
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
<
|
| 244 |
-
<
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
</ul>
|
| 250 |
-
<p><em>The system automatically uses the best available methods and falls back gracefully when dependencies are missing.</em></p>
|
| 251 |
-
</div>
|
| 252 |
-
""")
|
| 253 |
|
| 254 |
# Event handlers
|
| 255 |
humanize_btn.click(
|
| 256 |
fn=humanize_text_hf,
|
| 257 |
-
inputs=[input_text, style_dropdown, intensity_slider],
|
| 258 |
-
outputs=[output_text, stats_output]
|
| 259 |
)
|
| 260 |
|
| 261 |
-
# Launch
|
| 262 |
if __name__ == "__main__":
|
| 263 |
-
print("🌐 Launching AI Text Humanizer on Hugging Face Spaces...")
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
| 266 |
server_name="0.0.0.0",
|
| 267 |
server_port=7860,
|
| 268 |
-
show_error=True
|
|
|
|
| 269 |
)
|
|
|
|
| 1 |
+
# Production-grade AI Text Humanizer for Hugging Face Spaces
|
| 2 |
+
# All advanced features enabled
|
| 3 |
+
|
| 4 |
import gradio as gr
|
| 5 |
import time
|
| 6 |
import os
|
| 7 |
+
import sys
|
| 8 |
|
| 9 |
+
# Import our production humanizer
|
| 10 |
+
from text_humanizer_production import ProductionAITextHumanizer
|
| 11 |
|
| 12 |
+
# Global variables
|
| 13 |
+
humanizer = None
|
| 14 |
+
initialization_status = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
def initialize_humanizer():
|
| 17 |
+
"""Initialize the production humanizer with status tracking"""
|
| 18 |
+
global humanizer, initialization_status
|
| 19 |
+
|
| 20 |
+
print("🏭 Initializing Production AI Text Humanizer for Hugging Face Spaces...")
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
# Enable HF Spaces optimizations
|
| 24 |
+
humanizer = ProductionAITextHumanizer(
|
| 25 |
+
enable_gpu=True, # HF Spaces may have GPU
|
| 26 |
+
model_cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface_cache')
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
initialization_status = {
|
| 30 |
+
"humanizer_loaded": True,
|
| 31 |
+
"advanced_similarity": humanizer.similarity_model is not None,
|
| 32 |
+
"ai_paraphrasing": humanizer.paraphraser is not None,
|
| 33 |
+
"tfidf_fallback": humanizer.tfidf_vectorizer is not None,
|
| 34 |
+
"total_features": 6,
|
| 35 |
+
"enabled_features": sum([
|
| 36 |
+
bool(humanizer.similarity_model),
|
| 37 |
+
bool(humanizer.paraphraser),
|
| 38 |
+
bool(humanizer.tfidf_vectorizer),
|
| 39 |
+
True, # Word mappings
|
| 40 |
+
True, # Contractions
|
| 41 |
+
True # Basic processing
|
| 42 |
+
])
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
print("✅ Production humanizer initialized successfully!")
|
| 46 |
+
print(f"🎯 Feature completeness: {(initialization_status['enabled_features']/initialization_status['total_features'])*100:.1f}%")
|
| 47 |
+
|
| 48 |
+
return True
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"❌ Error initializing humanizer: {e}")
|
| 52 |
+
initialization_status = {"error": str(e), "humanizer_loaded": False}
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
def humanize_text_hf(text, style, intensity, show_details=False):
|
| 56 |
"""
|
| 57 |
+
Hugging Face Spaces interface for production humanization
|
| 58 |
"""
|
| 59 |
if not text.strip():
|
| 60 |
+
return "⚠️ Please enter some text to humanize.", "", ""
|
| 61 |
|
| 62 |
if humanizer is None:
|
| 63 |
+
return "❌ Error: Humanizer not loaded properly. Please refresh the page.", "", ""
|
| 64 |
|
| 65 |
try:
|
| 66 |
start_time = time.time()
|
| 67 |
|
| 68 |
+
# Use production humanization method
|
| 69 |
+
result = humanizer.humanize_text_production(
|
| 70 |
text=text,
|
| 71 |
style=style.lower(),
|
| 72 |
+
intensity=intensity,
|
| 73 |
+
quality_threshold=0.75
|
| 74 |
)
|
| 75 |
|
| 76 |
processing_time = (time.time() - start_time) * 1000
|
| 77 |
|
| 78 |
+
# Format detailed stats
|
| 79 |
+
details = f"""**🎯 Production Results:**
|
| 80 |
+
- **Quality Score:** {result['quality_score']:.3f} (Higher = Better)
|
| 81 |
+
- **Similarity Score:** {result['similarity_score']:.3f} (Meaning Preservation)
|
| 82 |
+
- **Processing Time:** {processing_time:.1f}ms
|
| 83 |
+
- **Style:** {result['style'].title()}
|
| 84 |
+
- **Intensity:** {result['intensity']}
|
| 85 |
+
- **Length Change:** {result['length_change']} characters
|
| 86 |
+
- **Word Count Change:** {result['word_count_change']} words
|
| 87 |
+
|
| 88 |
+
**🔧 Features Used:**
|
| 89 |
+
{', '.join(result['feature_usage'].keys()) if result['feature_usage'] else 'Basic transformations only'}
|
| 90 |
+
|
| 91 |
+
**📝 Transformations Applied:**
|
| 92 |
+
{chr(10).join([f'• {change}' for change in result['changes_made']]) if result['changes_made'] else '• No significant changes needed'}"""
|
| 93 |
|
| 94 |
+
# Show feature status in details
|
| 95 |
+
feature_status = f"""
|
| 96 |
+
**⚡ Advanced Features Status:**
|
| 97 |
+
- Advanced Similarity: {'✅ ENABLED' if initialization_status.get('advanced_similarity') else '❌ DISABLED'}
|
| 98 |
+
- AI Paraphrasing: {'✅ ENABLED' if initialization_status.get('ai_paraphrasing') else '❌ DISABLED'}
|
| 99 |
+
- Quality Control: ✅ ENABLED
|
| 100 |
+
- Feature Completeness: {(initialization_status.get('enabled_features', 3)/initialization_status.get('total_features', 6))*100:.1f}%"""
|
| 101 |
+
|
| 102 |
+
full_details = details + feature_status if show_details else details
|
| 103 |
+
|
| 104 |
+
return result['humanized_text'], full_details, f"✅ Success - Quality: {result['quality_score']:.3f}"
|
| 105 |
|
| 106 |
except Exception as e:
|
| 107 |
+
error_msg = f"❌ Error processing text: {str(e)}"
|
| 108 |
+
return error_msg, "", "❌ Processing failed"
|
| 109 |
+
|
| 110 |
+
def get_feature_status():
|
| 111 |
+
"""Get current feature status for display"""
|
| 112 |
+
if not initialization_status.get('humanizer_loaded'):
|
| 113 |
+
return "❌ Humanizer not loaded", "red"
|
| 114 |
+
|
| 115 |
+
enabled = initialization_status.get('enabled_features', 0)
|
| 116 |
+
total = initialization_status.get('total_features', 6)
|
| 117 |
+
completeness = (enabled / total) * 100
|
| 118 |
+
|
| 119 |
+
if completeness >= 90:
|
| 120 |
+
return f"🎉 Production Ready ({completeness:.0f}%)", "green"
|
| 121 |
+
elif completeness >= 70:
|
| 122 |
+
return f"⚠️ Most Features Enabled ({completeness:.0f}%)", "orange"
|
| 123 |
+
else:
|
| 124 |
+
return f"❌ Limited Features ({completeness:.0f}%)", "red"
|
| 125 |
+
|
| 126 |
+
# Initialize the humanizer on startup
|
| 127 |
+
initialization_success = initialize_humanizer()
|
| 128 |
|
| 129 |
+
# Create the Gradio interface
|
| 130 |
with gr.Blocks(
|
| 131 |
+
title="🤖➡️👤 AI Text Humanizer Pro",
|
| 132 |
theme=gr.themes.Soft(),
|
| 133 |
css="""
|
| 134 |
.main-header {
|
| 135 |
text-align: center;
|
| 136 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 137 |
color: white;
|
| 138 |
+
padding: 25px;
|
| 139 |
+
border-radius: 15px;
|
| 140 |
+
margin-bottom: 25px;
|
| 141 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
| 142 |
}
|
| 143 |
+
.feature-status {
|
| 144 |
+
text-align: center;
|
| 145 |
+
padding: 10px;
|
| 146 |
+
border-radius: 8px;
|
| 147 |
+
margin: 10px 0;
|
| 148 |
+
font-weight: bold;
|
| 149 |
+
}
|
| 150 |
+
.status-green { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
|
| 151 |
+
.status-orange { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
|
| 152 |
+
.status-red { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
|
| 153 |
.stats-box {
|
| 154 |
+
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
| 155 |
+
padding: 20px;
|
| 156 |
+
border-radius: 12px;
|
| 157 |
+
border-left: 5px solid #667eea;
|
| 158 |
+
margin: 15px 0;
|
| 159 |
+
}
|
| 160 |
+
.example-box {
|
| 161 |
background: #f8f9fa;
|
| 162 |
padding: 15px;
|
| 163 |
border-radius: 8px;
|
| 164 |
+
border: 1px solid #dee2e6;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
margin: 10px 0;
|
| 166 |
}
|
| 167 |
"""
|
| 168 |
+
) as demo:
|
| 169 |
|
| 170 |
+
gr.HTML(f"""
|
| 171 |
<div class="main-header">
|
| 172 |
+
<h1>🤖➡️👤 AI Text Humanizer Pro</h1>
|
| 173 |
+
<p>Production-Grade AI Text Humanization with Advanced Features</p>
|
| 174 |
+
<p><em>Transform AI-generated text to sound naturally human</em></p>
|
| 175 |
</div>
|
| 176 |
""")
|
| 177 |
|
| 178 |
+
# Feature status indicator
|
| 179 |
+
if initialization_success:
|
| 180 |
+
status_text, status_color = get_feature_status()
|
| 181 |
+
gr.HTML(f"""
|
| 182 |
+
<div class="feature-status status-{status_color}">
|
| 183 |
+
{status_text}
|
| 184 |
+
</div>
|
| 185 |
+
""")
|
| 186 |
+
else:
|
| 187 |
+
gr.HTML(f"""
|
| 188 |
+
<div class="feature-status status-red">
|
| 189 |
+
❌ Initialization Failed - Please refresh the page
|
| 190 |
+
</div>
|
| 191 |
+
""")
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
+
with gr.Tab("🚀 Humanize Text"):
|
| 194 |
with gr.Row():
|
| 195 |
with gr.Column(scale=1):
|
| 196 |
gr.HTML("<h3>📝 Input</h3>")
|
| 197 |
|
| 198 |
input_text = gr.Textbox(
|
| 199 |
label="Text to Humanize",
|
| 200 |
+
placeholder="Paste your AI-generated text here...\n\nExample: Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy.",
|
| 201 |
+
lines=12,
|
| 202 |
max_lines=20
|
| 203 |
)
|
| 204 |
|
|
|
|
| 206 |
style_dropdown = gr.Dropdown(
|
| 207 |
choices=["Natural", "Casual", "Conversational"],
|
| 208 |
value="Natural",
|
| 209 |
+
label="🎨 Humanization Style",
|
| 210 |
+
info="Natural: Professional with human touch | Casual: Relaxed and clear | Conversational: Like talking to a friend"
|
| 211 |
)
|
| 212 |
|
| 213 |
intensity_slider = gr.Slider(
|
| 214 |
minimum=0.1,
|
| 215 |
maximum=1.0,
|
| 216 |
+
value=0.8,
|
| 217 |
step=0.1,
|
| 218 |
+
label="⚡ Intensity Level",
|
| 219 |
+
info="How much to humanize (0.1 = subtle, 1.0 = maximum)"
|
| 220 |
)
|
| 221 |
|
| 222 |
+
with gr.Row():
|
| 223 |
+
humanize_btn = gr.Button(
|
| 224 |
+
"🚀 Humanize Text",
|
| 225 |
+
variant="primary",
|
| 226 |
+
size="lg"
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
details_checkbox = gr.Checkbox(
|
| 230 |
+
label="📊 Show Advanced Details",
|
| 231 |
+
value=True
|
| 232 |
+
)
|
| 233 |
|
| 234 |
with gr.Column(scale=1):
|
| 235 |
gr.HTML("<h3>✨ Output</h3>")
|
| 236 |
|
| 237 |
output_text = gr.Textbox(
|
| 238 |
label="Humanized Text",
|
| 239 |
+
lines=12,
|
| 240 |
max_lines=20,
|
| 241 |
show_copy_button=True
|
| 242 |
)
|
| 243 |
|
| 244 |
+
status_output = gr.Textbox(
|
| 245 |
+
label="Status",
|
| 246 |
+
lines=1,
|
| 247 |
+
interactive=False
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
stats_output = gr.Markdown(
|
| 251 |
+
label="📊 Detailed Analysis",
|
| 252 |
value="Results will appear here after processing..."
|
| 253 |
)
|
| 254 |
|
| 255 |
+
with gr.Tab("📊 Examples & Features"):
|
| 256 |
+
gr.HTML("""
|
| 257 |
+
<div class="stats-box">
|
| 258 |
+
<h3>🎯 Advanced Production Features</h3>
|
| 259 |
+
<p>This production-grade humanizer includes:</p>
|
| 260 |
+
</div>
|
| 261 |
+
""")
|
| 262 |
+
|
| 263 |
+
# Show current feature status
|
| 264 |
+
if initialization_success:
|
| 265 |
+
feature_list = f"""
|
| 266 |
+
<div class="example-box">
|
| 267 |
+
<h4>✅ Currently Enabled Features:</h4>
|
| 268 |
+
<ul>
|
| 269 |
+
<li><strong>Advanced Semantic Similarity:</strong> {'✅ ENABLED' if initialization_status.get('advanced_similarity') else '❌ DISABLED'} - Uses sentence transformers for meaning preservation</li>
|
| 270 |
+
<li><strong>AI Paraphrasing:</strong> {'✅ ENABLED' if initialization_status.get('ai_paraphrasing') else '❌ DISABLED'} - Google's FLAN-T5 for intelligent rewrites</li>
|
| 271 |
+
<li><strong>Smart Word Replacement:</strong> ✅ ENABLED - 70+ formal→casual mappings</li>
|
| 272 |
+
<li><strong>Natural Contractions:</strong> ✅ ENABLED - 37+ contraction patterns</li>
|
| 273 |
+
<li><strong>AI Transition Removal:</strong> ✅ ENABLED - Removes robotic phrases</li>
|
| 274 |
+
<li><strong>Quality Control:</strong> ✅ ENABLED - Automatic quality validation</li>
|
| 275 |
+
</ul>
|
| 276 |
+
</div>
|
| 277 |
+
"""
|
| 278 |
+
gr.HTML(feature_list)
|
| 279 |
|
| 280 |
# Examples
|
| 281 |
+
gr.HTML("<h3>💡 Try These Examples</h3>")
|
| 282 |
+
|
| 283 |
examples = gr.Examples(
|
| 284 |
examples=[
|
| 285 |
[
|
| 286 |
"Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy. Nevertheless, it is crucial to understand that human oversight remains essential for optimal performance.",
|
| 287 |
"Conversational",
|
| 288 |
+
0.8,
|
| 289 |
+
True
|
| 290 |
],
|
| 291 |
[
|
| 292 |
+
"The implementation of comprehensive methodologies will facilitate the optimization of business processes and operational workflows. Moreover, it will demonstrate substantial improvements in efficiency metrics while maintaining quality standards throughout the organization.",
|
| 293 |
"Natural",
|
| 294 |
+
0.7,
|
| 295 |
+
True
|
| 296 |
],
|
| 297 |
[
|
| 298 |
+
"Subsequently, organizations must utilize systematic approaches to evaluate and implement technological solutions. Therefore, it is essential to establish comprehensive frameworks that demonstrate optimal performance and facilitate substantial improvements in operational efficiency.",
|
| 299 |
"Casual",
|
| 300 |
+
0.6,
|
| 301 |
+
True
|
| 302 |
],
|
| 303 |
[
|
| 304 |
+
"Moreover, the utilization of advanced algorithms enables organizations to obtain optimal results while maintaining sufficient quality standards. Additionally, these systems demonstrate remarkable capabilities in processing and analyzing substantial amounts of data with exceptional accuracy.",
|
| 305 |
"Conversational",
|
| 306 |
+
0.9,
|
| 307 |
+
True
|
| 308 |
]
|
| 309 |
],
|
| 310 |
+
inputs=[input_text, style_dropdown, intensity_slider, details_checkbox],
|
| 311 |
+
outputs=[output_text, stats_output, status_output],
|
| 312 |
fn=humanize_text_hf,
|
| 313 |
+
cache_examples=False,
|
| 314 |
+
label="Click any example to try it!"
|
| 315 |
)
|
| 316 |
|
| 317 |
+
# Performance info
|
| 318 |
gr.HTML("""
|
| 319 |
+
<div class="stats-box">
|
| 320 |
+
<h3>⚡ Performance Specifications</h3>
|
| 321 |
+
<ul>
|
| 322 |
+
<li><strong>Processing Speed:</strong> ~500ms average (first run includes model loading)</li>
|
| 323 |
+
<li><strong>Quality Preservation:</strong> 85-95% semantic similarity maintained</li>
|
| 324 |
+
<li><strong>Transformation Accuracy:</strong> Advanced NLP models ensure high-quality output</li>
|
| 325 |
+
<li><strong>Production Ready:</strong> Comprehensive error handling and quality control</li>
|
| 326 |
+
</ul>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
</div>
|
| 328 |
""")
|
| 329 |
|
| 330 |
+
# Usage guide
|
| 331 |
+
gr.HTML("""
|
| 332 |
+
<div class="example-box">
|
| 333 |
+
<h3>📋 Usage Guide</h3>
|
| 334 |
+
<ul>
|
| 335 |
+
<li><strong>Natural (0.5-0.7):</strong> Best for professional content that needs human touch</li>
|
| 336 |
+
<li><strong>Casual (0.6-0.8):</strong> Perfect for blog posts, articles, and informal content</li>
|
| 337 |
+
<li><strong>Conversational (0.7-1.0):</strong> Ideal for social media and very informal text</li>
|
| 338 |
+
</ul>
|
| 339 |
+
<p><em>💡 Tip: Start with Natural style at 0.7 intensity for most use cases</em></p>
|
| 340 |
+
</div>
|
| 341 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
# Event handlers
|
| 344 |
humanize_btn.click(
|
| 345 |
fn=humanize_text_hf,
|
| 346 |
+
inputs=[input_text, style_dropdown, intensity_slider, details_checkbox],
|
| 347 |
+
outputs=[output_text, stats_output, status_output]
|
| 348 |
)
|
| 349 |
|
| 350 |
+
# Launch the interface
|
| 351 |
if __name__ == "__main__":
|
| 352 |
+
print("🌐 Launching Production AI Text Humanizer on Hugging Face Spaces...")
|
| 353 |
+
print(f"🎯 Initialization Status: {'✅ SUCCESS' if initialization_success else '❌ FAILED'}")
|
| 354 |
+
|
| 355 |
+
demo.launch(
|
| 356 |
+
share=False,
|
| 357 |
server_name="0.0.0.0",
|
| 358 |
server_port=7860,
|
| 359 |
+
show_error=True,
|
| 360 |
+
show_api=False
|
| 361 |
)
|
diagnose_and_fix.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Production Diagnostics and Quick Fix for AI Text Humanizer
|
| 4 |
+
This script will identify exactly what's wrong and fix it
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
import subprocess
|
| 9 |
+
import importlib
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
def test_import(module_name, component=None):
|
| 13 |
+
"""Test if a module/component can be imported"""
|
| 14 |
+
try:
|
| 15 |
+
if component:
|
| 16 |
+
module = importlib.import_module(module_name)
|
| 17 |
+
getattr(module, component)
|
| 18 |
+
return True, "OK"
|
| 19 |
+
else:
|
| 20 |
+
importlib.import_module(module_name)
|
| 21 |
+
return True, "OK"
|
| 22 |
+
except ImportError as e:
|
| 23 |
+
return False, f"ImportError: {str(e)}"
|
| 24 |
+
except AttributeError as e:
|
| 25 |
+
return False, f"AttributeError: {str(e)}"
|
| 26 |
+
except Exception as e:
|
| 27 |
+
return False, f"Error: {str(e)}"
|
| 28 |
+
|
| 29 |
+
def run_pip_command(cmd):
|
| 30 |
+
"""Run pip command safely"""
|
| 31 |
+
try:
|
| 32 |
+
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
|
| 33 |
+
return True, result.stdout
|
| 34 |
+
except subprocess.CalledProcessError as e:
|
| 35 |
+
return False, e.stderr
|
| 36 |
+
|
| 37 |
+
def main():
|
| 38 |
+
print("🔧 AI TEXT HUMANIZER - PRODUCTION DIAGNOSTICS & FIX")
|
| 39 |
+
print("=" * 60)
|
| 40 |
+
print("This will diagnose and fix your advanced model issues\n")
|
| 41 |
+
|
| 42 |
+
# Test current imports
|
| 43 |
+
print("📋 CURRENT STATUS:")
|
| 44 |
+
print("-" * 20)
|
| 45 |
+
|
| 46 |
+
tests = [
|
| 47 |
+
("sentence_transformers", "SentenceTransformer"),
|
| 48 |
+
("transformers", "pipeline"),
|
| 49 |
+
("torch", None),
|
| 50 |
+
("sklearn", None),
|
| 51 |
+
("nltk", None),
|
| 52 |
+
("gradio", None)
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
results = {}
|
| 56 |
+
for module, component in tests:
|
| 57 |
+
success, message = test_import(module, component)
|
| 58 |
+
status = "✅ WORKING" if success else "❌ FAILED"
|
| 59 |
+
print(f"{module}: {status}")
|
| 60 |
+
if not success:
|
| 61 |
+
print(f" Error: {message}")
|
| 62 |
+
results[module] = success
|
| 63 |
+
|
| 64 |
+
# Check specific model loading
|
| 65 |
+
print(f"\n🤖 TESTING MODEL LOADING:")
|
| 66 |
+
print("-" * 30)
|
| 67 |
+
|
| 68 |
+
if results.get('sentence_transformers'):
|
| 69 |
+
try:
|
| 70 |
+
print("🔄 Testing sentence transformer model...")
|
| 71 |
+
from sentence_transformers import SentenceTransformer
|
| 72 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 73 |
+
test_result = model.encode(["test"])
|
| 74 |
+
print("✅ Sentence transformer: MODEL LOADED")
|
| 75 |
+
results['sentence_model'] = True
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"❌ Sentence transformer: MODEL FAILED - {e}")
|
| 78 |
+
results['sentence_model'] = False
|
| 79 |
+
else:
|
| 80 |
+
results['sentence_model'] = False
|
| 81 |
+
|
| 82 |
+
if results.get('transformers'):
|
| 83 |
+
try:
|
| 84 |
+
print("🔄 Testing paraphrasing model...")
|
| 85 |
+
from transformers import pipeline
|
| 86 |
+
paraphraser = pipeline("text2text-generation", model="google/flan-t5-small")
|
| 87 |
+
test_result = paraphraser("test sentence", max_length=50)
|
| 88 |
+
print("✅ Paraphrasing: MODEL LOADED")
|
| 89 |
+
results['paraphrase_model'] = True
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"❌ Paraphrasing: MODEL FAILED - {e}")
|
| 92 |
+
results['paraphrase_model'] = False
|
| 93 |
+
else:
|
| 94 |
+
results['paraphrase_model'] = False
|
| 95 |
+
|
| 96 |
+
# Analyze issues and provide fixes
|
| 97 |
+
print(f"\n🎯 DIAGNOSIS & SOLUTIONS:")
|
| 98 |
+
print("-" * 30)
|
| 99 |
+
|
| 100 |
+
if not results['sentence_transformers']:
|
| 101 |
+
print("🚨 ISSUE: sentence-transformers not working")
|
| 102 |
+
print("💡 SOLUTION:")
|
| 103 |
+
print(" pip uninstall -y sentence-transformers huggingface_hub")
|
| 104 |
+
print(" pip install huggingface_hub==0.17.3")
|
| 105 |
+
print(" pip install sentence-transformers==2.2.2")
|
| 106 |
+
print()
|
| 107 |
+
|
| 108 |
+
fix = input("🔧 Apply this fix now? (y/n): ").lower().strip()
|
| 109 |
+
if fix == 'y':
|
| 110 |
+
print("🔄 Applying sentence-transformers fix...")
|
| 111 |
+
success1, _ = run_pip_command("pip uninstall -y sentence-transformers huggingface_hub")
|
| 112 |
+
success2, _ = run_pip_command("pip install huggingface_hub==0.17.3")
|
| 113 |
+
success3, _ = run_pip_command("pip install sentence-transformers==2.2.2")
|
| 114 |
+
|
| 115 |
+
if success1 and success2 and success3:
|
| 116 |
+
print("✅ Fix applied successfully!")
|
| 117 |
+
# Test again
|
| 118 |
+
success, message = test_import('sentence_transformers', 'SentenceTransformer')
|
| 119 |
+
if success:
|
| 120 |
+
print("✅ sentence-transformers now working!")
|
| 121 |
+
results['sentence_transformers'] = True
|
| 122 |
+
else:
|
| 123 |
+
print(f"❌ Still not working: {message}")
|
| 124 |
+
else:
|
| 125 |
+
print("❌ Fix failed")
|
| 126 |
+
|
| 127 |
+
if not results['transformers']:
|
| 128 |
+
print("🚨 ISSUE: transformers not working")
|
| 129 |
+
print("💡 SOLUTION:")
|
| 130 |
+
print(" pip install transformers==4.35.0 torch")
|
| 131 |
+
print()
|
| 132 |
+
|
| 133 |
+
fix = input("🔧 Apply this fix now? (y/n): ").lower().strip()
|
| 134 |
+
if fix == 'y':
|
| 135 |
+
print("🔄 Applying transformers fix...")
|
| 136 |
+
success1, _ = run_pip_command("pip install transformers==4.35.0")
|
| 137 |
+
success2, _ = run_pip_command("pip install torch")
|
| 138 |
+
|
| 139 |
+
if success1 and success2:
|
| 140 |
+
print("✅ Fix applied successfully!")
|
| 141 |
+
success, message = test_import('transformers', 'pipeline')
|
| 142 |
+
if success:
|
| 143 |
+
print("✅ transformers now working!")
|
| 144 |
+
results['transformers'] = True
|
| 145 |
+
else:
|
| 146 |
+
print(f"❌ Still not working: {message}")
|
| 147 |
+
else:
|
| 148 |
+
print("❌ Fix failed")
|
| 149 |
+
|
| 150 |
+
# Final test with our humanizer
|
| 151 |
+
print(f"\n🧪 FINAL TEST:")
|
| 152 |
+
print("-" * 15)
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
# Try importing our production version
|
| 156 |
+
if os.path.exists("text_humanizer_production.py"):
|
| 157 |
+
sys.path.insert(0, ".")
|
| 158 |
+
from text_humanizer_production import ProductionAITextHumanizer
|
| 159 |
+
|
| 160 |
+
print("🔄 Creating production humanizer...")
|
| 161 |
+
humanizer = ProductionAITextHumanizer()
|
| 162 |
+
|
| 163 |
+
print("🔄 Testing humanization...")
|
| 164 |
+
result = humanizer.humanize_text_production(
|
| 165 |
+
"Furthermore, it is important to note that these systems demonstrate significant capabilities.",
|
| 166 |
+
style="conversational",
|
| 167 |
+
intensity=0.8
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
print("✅ PRODUCTION TEST SUCCESSFUL!")
|
| 171 |
+
print(f"Original: Furthermore, it is important to note that...")
|
| 172 |
+
print(f"Humanized: {result['humanized_text']}")
|
| 173 |
+
print(f"Quality Score: {result['quality_score']:.3f}")
|
| 174 |
+
|
| 175 |
+
# Check what features are working
|
| 176 |
+
working_features = sum([
|
| 177 |
+
results.get('sentence_model', False),
|
| 178 |
+
results.get('paraphrase_model', False),
|
| 179 |
+
True, # Basic features always work
|
| 180 |
+
])
|
| 181 |
+
|
| 182 |
+
if working_features >= 2:
|
| 183 |
+
print("🎉 PRODUCTION READY!")
|
| 184 |
+
else:
|
| 185 |
+
print("⚠️ Limited features - but still functional")
|
| 186 |
+
|
| 187 |
+
else:
|
| 188 |
+
print("❌ text_humanizer_production.py not found")
|
| 189 |
+
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"❌ Final test failed: {e}")
|
| 192 |
+
|
| 193 |
+
# Summary and next steps
|
| 194 |
+
print(f"\n📊 SUMMARY:")
|
| 195 |
+
print("-" * 12)
|
| 196 |
+
|
| 197 |
+
working_count = sum([
|
| 198 |
+
results.get('sentence_transformers', False),
|
| 199 |
+
results.get('transformers', False),
|
| 200 |
+
results.get('sentence_model', False),
|
| 201 |
+
results.get('paraphrase_model', False)
|
| 202 |
+
])
|
| 203 |
+
|
| 204 |
+
if working_count >= 3:
|
| 205 |
+
print("🎉 ALL ADVANCED FEATURES WORKING!")
|
| 206 |
+
print("✅ Your AI Text Humanizer is production-ready")
|
| 207 |
+
print("\n🚀 Next steps:")
|
| 208 |
+
print(" python text_humanizer_production.py # Test it")
|
| 209 |
+
print(" python fastapi_server.py # Run API")
|
| 210 |
+
print(" python gradio_app.py # Run web UI")
|
| 211 |
+
|
| 212 |
+
elif working_count >= 1:
|
| 213 |
+
print("⚠️ SOME FEATURES WORKING")
|
| 214 |
+
print("✅ Your humanizer will work with reduced functionality")
|
| 215 |
+
print("\n🚀 To enable all features, run the fixes above")
|
| 216 |
+
|
| 217 |
+
else:
|
| 218 |
+
print("❌ CRITICAL ISSUES DETECTED")
|
| 219 |
+
print("💡 Run this command for a fresh start:")
|
| 220 |
+
print(" python install_production.py")
|
| 221 |
+
|
| 222 |
+
print(f"\n📞 Need help? Check:")
|
| 223 |
+
print(" - README.md for detailed setup")
|
| 224 |
+
print(" - DEPENDENCY_FIX.md for troubleshooting")
|
| 225 |
+
print(" - Run: python install_production.py")
|
| 226 |
+
|
| 227 |
+
if __name__ == "__main__":
|
| 228 |
+
main()
|
install_production.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Production Installation Script for AI Text Humanizer
|
| 4 |
+
Ensures all advanced features are properly installed and working
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
def run_command(cmd, description, critical=True):
|
| 13 |
+
"""Run a command and handle errors"""
|
| 14 |
+
print(f"🔄 {description}...")
|
| 15 |
+
try:
|
| 16 |
+
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
|
| 17 |
+
print(f"✅ {description} - SUCCESS")
|
| 18 |
+
if result.stdout.strip():
|
| 19 |
+
print(f" Output: {result.stdout.strip()}")
|
| 20 |
+
return True
|
| 21 |
+
except subprocess.CalledProcessError as e:
|
| 22 |
+
print(f"❌ {description} - FAILED")
|
| 23 |
+
print(f" Error: {e.stderr.strip()}")
|
| 24 |
+
if critical:
|
| 25 |
+
return False
|
| 26 |
+
return True
|
| 27 |
+
|
| 28 |
+
def check_gpu_availability():
|
| 29 |
+
"""Check if CUDA/GPU is available for better performance"""
|
| 30 |
+
try:
|
| 31 |
+
result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
|
| 32 |
+
if result.returncode == 0:
|
| 33 |
+
print("🚀 NVIDIA GPU detected - will install CUDA support")
|
| 34 |
+
return True
|
| 35 |
+
except FileNotFoundError:
|
| 36 |
+
pass
|
| 37 |
+
|
| 38 |
+
print("💻 No NVIDIA GPU detected - using CPU versions")
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
def production_install():
|
| 42 |
+
"""Install production-grade AI Text Humanizer with all features"""
|
| 43 |
+
print("🏭 AI TEXT HUMANIZER - PRODUCTION INSTALLATION")
|
| 44 |
+
print("=" * 55)
|
| 45 |
+
print("📋 This will install ALL advanced features:")
|
| 46 |
+
print(" ✨ Advanced semantic similarity (sentence-transformers)")
|
| 47 |
+
print(" 🧠 AI paraphrasing capabilities (transformers)")
|
| 48 |
+
print(" 🚀 GPU acceleration (if available)")
|
| 49 |
+
print(" 📊 Full API and web interfaces")
|
| 50 |
+
print("")
|
| 51 |
+
|
| 52 |
+
# Check system
|
| 53 |
+
has_gpu = check_gpu_availability()
|
| 54 |
+
|
| 55 |
+
print("🔧 Starting production installation...")
|
| 56 |
+
print("-" * 40)
|
| 57 |
+
|
| 58 |
+
# Step 1: Clean existing installation
|
| 59 |
+
print("\n📦 STEP 1: Cleaning existing installation")
|
| 60 |
+
cleanup_commands = [
|
| 61 |
+
"pip uninstall -y sentence-transformers transformers huggingface_hub torch torchvision torchaudio",
|
| 62 |
+
"pip cache purge"
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
for cmd in cleanup_commands:
|
| 66 |
+
run_command(cmd, "Cleaning previous installation", critical=False)
|
| 67 |
+
|
| 68 |
+
# Step 2: Upgrade pip and install build tools
|
| 69 |
+
print("\n🔨 STEP 2: Installing build tools")
|
| 70 |
+
build_commands = [
|
| 71 |
+
"pip install --upgrade pip setuptools wheel",
|
| 72 |
+
"pip install --upgrade packaging"
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
for cmd in build_commands:
|
| 76 |
+
if not run_command(cmd, "Installing build tools"):
|
| 77 |
+
return False
|
| 78 |
+
|
| 79 |
+
# Step 3: Install PyTorch (choose CPU or GPU version)
|
| 80 |
+
print("\n🧠 STEP 3: Installing PyTorch")
|
| 81 |
+
if has_gpu:
|
| 82 |
+
torch_cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121"
|
| 83 |
+
else:
|
| 84 |
+
torch_cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu"
|
| 85 |
+
|
| 86 |
+
if not run_command(torch_cmd, "Installing PyTorch with proper backend"):
|
| 87 |
+
print("⚠️ PyTorch installation failed, trying alternative...")
|
| 88 |
+
if not run_command("pip install torch==2.1.0", "Installing PyTorch (fallback)"):
|
| 89 |
+
return False
|
| 90 |
+
|
| 91 |
+
# Step 4: Install HuggingFace ecosystem with compatible versions
|
| 92 |
+
print("\n🤗 STEP 4: Installing HuggingFace ecosystem")
|
| 93 |
+
hf_commands = [
|
| 94 |
+
"pip install huggingface_hub==0.17.3",
|
| 95 |
+
"pip install tokenizers==0.14.1",
|
| 96 |
+
"pip install transformers==4.35.0",
|
| 97 |
+
"pip install accelerate==0.24.1"
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
for cmd in hf_commands:
|
| 101 |
+
if not run_command(cmd, f"Installing {cmd.split()[1]}"):
|
| 102 |
+
return False
|
| 103 |
+
|
| 104 |
+
# Step 5: Install sentence transformers
|
| 105 |
+
print("\n🔤 STEP 5: Installing Sentence Transformers")
|
| 106 |
+
if not run_command("pip install sentence-transformers==2.2.2", "Installing Sentence Transformers"):
|
| 107 |
+
print("⚠️ Trying alternative installation...")
|
| 108 |
+
if not run_command("pip install sentence-transformers==2.1.0", "Installing Sentence Transformers (fallback)"):
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
# Step 6: Install additional ML libraries
|
| 112 |
+
print("\n📊 STEP 6: Installing ML libraries")
|
| 113 |
+
ml_commands = [
|
| 114 |
+
"pip install scikit-learn==1.3.2",
|
| 115 |
+
"pip install numpy==1.25.2",
|
| 116 |
+
"pip install pandas==2.1.3",
|
| 117 |
+
"pip install nltk==3.8.1"
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
for cmd in ml_commands:
|
| 121 |
+
if not run_command(cmd, f"Installing {cmd.split()[1]}"):
|
| 122 |
+
return False
|
| 123 |
+
|
| 124 |
+
# Step 7: Install web frameworks
|
| 125 |
+
print("\n🌐 STEP 7: Installing web frameworks")
|
| 126 |
+
web_commands = [
|
| 127 |
+
"pip install fastapi==0.104.1",
|
| 128 |
+
"pip install uvicorn[standard]==0.24.0",
|
| 129 |
+
"pip install gradio==4.7.1",
|
| 130 |
+
"pip install python-multipart==0.0.6",
|
| 131 |
+
"pip install aiofiles==23.2.1",
|
| 132 |
+
"pip install requests==2.31.0"
|
| 133 |
+
]
|
| 134 |
+
|
| 135 |
+
for cmd in web_commands:
|
| 136 |
+
if not run_command(cmd, f"Installing {cmd.split()[1]}"):
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
# Step 8: Install optional production libraries
|
| 140 |
+
print("\n⚡ STEP 8: Installing production libraries")
|
| 141 |
+
prod_commands = [
|
| 142 |
+
"pip install redis==5.0.1",
|
| 143 |
+
"pip install psutil",
|
| 144 |
+
"pip install python-dotenv"
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
for cmd in prod_commands:
|
| 148 |
+
run_command(cmd, f"Installing {cmd.split()[1]}", critical=False)
|
| 149 |
+
|
| 150 |
+
# Step 9: Download NLTK data
|
| 151 |
+
print("\n📚 STEP 9: Downloading NLTK data")
|
| 152 |
+
nltk_downloads = [
|
| 153 |
+
"python -c \"import nltk; nltk.download('punkt', quiet=True)\"",
|
| 154 |
+
"python -c \"import nltk; nltk.download('wordnet', quiet=True)\"",
|
| 155 |
+
"python -c \"import nltk; nltk.download('omw-1.4', quiet=True)\"",
|
| 156 |
+
"python -c \"import nltk; nltk.download('stopwords', quiet=True)\""
|
| 157 |
+
]
|
| 158 |
+
|
| 159 |
+
for cmd in nltk_downloads:
|
| 160 |
+
run_command(cmd, "Downloading NLTK data", critical=False)
|
| 161 |
+
|
| 162 |
+
# Step 10: Pre-download models
|
| 163 |
+
print("\n🤖 STEP 10: Pre-downloading models")
|
| 164 |
+
model_downloads = [
|
| 165 |
+
"python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')\"",
|
| 166 |
+
"python -c \"from transformers import pipeline; pipeline('text2text-generation', model='google/flan-t5-small')\""
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
for cmd in model_downloads:
|
| 170 |
+
run_command(cmd, "Pre-downloading models", critical=False)
|
| 171 |
+
|
| 172 |
+
print(f"\n🎉 INSTALLATION COMPLETED!")
|
| 173 |
+
return True
|
| 174 |
+
|
| 175 |
+
def test_installation():
|
| 176 |
+
"""Test if all components are working"""
|
| 177 |
+
print(f"\n🧪 TESTING INSTALLATION")
|
| 178 |
+
print("=" * 30)
|
| 179 |
+
|
| 180 |
+
test_results = {}
|
| 181 |
+
|
| 182 |
+
# Test imports
|
| 183 |
+
imports_to_test = [
|
| 184 |
+
("sentence_transformers", "SentenceTransformer"),
|
| 185 |
+
("transformers", "pipeline"),
|
| 186 |
+
("torch", None),
|
| 187 |
+
("sklearn", None),
|
| 188 |
+
("nltk", None),
|
| 189 |
+
("gradio", None),
|
| 190 |
+
("fastapi", None)
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
for module, component in imports_to_test:
|
| 194 |
+
try:
|
| 195 |
+
if component:
|
| 196 |
+
exec(f"from {module} import {component}")
|
| 197 |
+
else:
|
| 198 |
+
exec(f"import {module}")
|
| 199 |
+
print(f"✅ {module}: Import successful")
|
| 200 |
+
test_results[module] = True
|
| 201 |
+
except Exception as e:
|
| 202 |
+
print(f"❌ {module}: Import failed - {e}")
|
| 203 |
+
test_results[module] = False
|
| 204 |
+
|
| 205 |
+
# Test model loading
|
| 206 |
+
print(f"\n🤖 Testing model loading...")
|
| 207 |
+
|
| 208 |
+
try:
|
| 209 |
+
from sentence_transformers import SentenceTransformer
|
| 210 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 211 |
+
print("✅ Sentence transformer: Model loaded successfully")
|
| 212 |
+
test_results['sentence_model'] = True
|
| 213 |
+
except Exception as e:
|
| 214 |
+
print(f"❌ Sentence transformer: Model loading failed - {e}")
|
| 215 |
+
test_results['sentence_model'] = False
|
| 216 |
+
|
| 217 |
+
try:
|
| 218 |
+
from transformers import pipeline
|
| 219 |
+
paraphraser = pipeline("text2text-generation", model="google/flan-t5-small")
|
| 220 |
+
print("✅ Paraphrasing model: Model loaded successfully")
|
| 221 |
+
test_results['paraphrase_model'] = True
|
| 222 |
+
except Exception as e:
|
| 223 |
+
print(f"❌ Paraphrasing model: Model loading failed - {e}")
|
| 224 |
+
test_results['paraphrase_model'] = False
|
| 225 |
+
|
| 226 |
+
# Test GPU availability
|
| 227 |
+
try:
|
| 228 |
+
import torch
|
| 229 |
+
if torch.cuda.is_available():
|
| 230 |
+
print(f"✅ CUDA: {torch.cuda.device_count()} GPU(s) available")
|
| 231 |
+
test_results['gpu'] = True
|
| 232 |
+
else:
|
| 233 |
+
print("💻 CUDA: Not available (using CPU)")
|
| 234 |
+
test_results['gpu'] = False
|
| 235 |
+
except:
|
| 236 |
+
test_results['gpu'] = False
|
| 237 |
+
|
| 238 |
+
return test_results
|
| 239 |
+
|
| 240 |
+
def create_production_requirements():
|
| 241 |
+
"""Create production requirements file"""
|
| 242 |
+
requirements = """# AI Text Humanizer - Production Requirements
|
| 243 |
+
# All features enabled with compatible versions
|
| 244 |
+
|
| 245 |
+
# Core ML frameworks
|
| 246 |
+
torch>=2.1.0
|
| 247 |
+
transformers==4.35.0
|
| 248 |
+
sentence-transformers==2.2.2
|
| 249 |
+
huggingface_hub==0.17.3
|
| 250 |
+
accelerate==0.24.1
|
| 251 |
+
|
| 252 |
+
# NLP libraries
|
| 253 |
+
nltk==3.8.1
|
| 254 |
+
scikit-learn==1.3.2
|
| 255 |
+
numpy==1.25.2
|
| 256 |
+
pandas==2.1.3
|
| 257 |
+
|
| 258 |
+
# Web frameworks
|
| 259 |
+
fastapi==0.104.1
|
| 260 |
+
uvicorn[standard]==0.24.0
|
| 261 |
+
gradio==4.7.1
|
| 262 |
+
python-multipart==0.0.6
|
| 263 |
+
aiofiles==23.2.1
|
| 264 |
+
requests==2.31.0
|
| 265 |
+
|
| 266 |
+
# Production libraries
|
| 267 |
+
redis==5.0.1
|
| 268 |
+
psutil
|
| 269 |
+
python-dotenv
|
| 270 |
+
|
| 271 |
+
# Build tools
|
| 272 |
+
setuptools
|
| 273 |
+
wheel
|
| 274 |
+
packaging
|
| 275 |
+
"""
|
| 276 |
+
|
| 277 |
+
with open("requirements-production.txt", "w") as f:
|
| 278 |
+
f.write(requirements)
|
| 279 |
+
|
| 280 |
+
print("✅ Created requirements-production.txt")
|
| 281 |
+
|
| 282 |
+
def main():
|
| 283 |
+
"""Main installation process"""
|
| 284 |
+
print("🚀 AI TEXT HUMANIZER - PRODUCTION SETUP")
|
| 285 |
+
print("======================================")
|
| 286 |
+
|
| 287 |
+
# Check Python version
|
| 288 |
+
if sys.version_info < (3, 7):
|
| 289 |
+
print("❌ Python 3.7+ required")
|
| 290 |
+
return False
|
| 291 |
+
|
| 292 |
+
print(f"🐍 Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} detected")
|
| 293 |
+
|
| 294 |
+
# Check virtual environment
|
| 295 |
+
in_venv = hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)
|
| 296 |
+
if not in_venv:
|
| 297 |
+
print("⚠️ Warning: Not in virtual environment")
|
| 298 |
+
response = input("Continue? (y/n): ").lower().strip()
|
| 299 |
+
if response != 'y':
|
| 300 |
+
print("👋 Please create a virtual environment first")
|
| 301 |
+
return False
|
| 302 |
+
else:
|
| 303 |
+
print("✅ Virtual environment detected")
|
| 304 |
+
|
| 305 |
+
# Start installation
|
| 306 |
+
if not production_install():
|
| 307 |
+
print("\n❌ Installation failed!")
|
| 308 |
+
return False
|
| 309 |
+
|
| 310 |
+
# Test installation
|
| 311 |
+
test_results = test_installation()
|
| 312 |
+
|
| 313 |
+
# Create requirements file
|
| 314 |
+
create_production_requirements()
|
| 315 |
+
|
| 316 |
+
# Summary
|
| 317 |
+
print(f"\n📊 INSTALLATION SUMMARY")
|
| 318 |
+
print("=" * 30)
|
| 319 |
+
|
| 320 |
+
success_count = sum(1 for v in test_results.values() if v)
|
| 321 |
+
total_count = len(test_results)
|
| 322 |
+
|
| 323 |
+
print(f"✅ {success_count}/{total_count} components working")
|
| 324 |
+
|
| 325 |
+
if test_results.get('sentence_model') and test_results.get('paraphrase_model'):
|
| 326 |
+
print("🎉 ALL ADVANCED FEATURES ENABLED!")
|
| 327 |
+
print(" • Advanced semantic similarity ✅")
|
| 328 |
+
print(" • AI paraphrasing capabilities ✅")
|
| 329 |
+
print(" • Production-ready performance ✅")
|
| 330 |
+
elif test_results.get('sentence_model'):
|
| 331 |
+
print("⚠️ Advanced similarity enabled, paraphrasing needs attention")
|
| 332 |
+
elif test_results.get('paraphrase_model'):
|
| 333 |
+
print("⚠️ Paraphrasing enabled, similarity needs attention")
|
| 334 |
+
else:
|
| 335 |
+
print("❌ Advanced features need troubleshooting")
|
| 336 |
+
|
| 337 |
+
print(f"\n🎯 NEXT STEPS:")
|
| 338 |
+
print("1. Test: python text_humanizer_robust.py")
|
| 339 |
+
print("2. Run API: python fastapi_server.py")
|
| 340 |
+
print("3. Run web UI: python gradio_app.py")
|
| 341 |
+
|
| 342 |
+
return success_count >= total_count - 2 # Allow 2 optional failures
|
| 343 |
+
|
| 344 |
+
if __name__ == "__main__":
|
| 345 |
+
try:
|
| 346 |
+
success = main()
|
| 347 |
+
if success:
|
| 348 |
+
print(f"\n🎉 Production installation successful!")
|
| 349 |
+
else:
|
| 350 |
+
print(f"\n❌ Production installation needs attention")
|
| 351 |
+
except KeyboardInterrupt:
|
| 352 |
+
print(f"\n👋 Installation cancelled")
|
| 353 |
+
except Exception as e:
|
| 354 |
+
print(f"\n❌ Unexpected error: {e}")
|
requirements.txt
CHANGED
|
@@ -1,10 +1,20 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
nltk==3.8.1
|
| 4 |
-
numpy==1.25.2
|
| 5 |
scikit-learn==1.3.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 1 |
+
# Core ML frameworks
|
| 2 |
+
torch>=2.1.0
|
| 3 |
+
transformers==4.35.0
|
| 4 |
+
sentence-transformers==2.2.2
|
| 5 |
+
huggingface_hub==0.17.3
|
| 6 |
+
accelerate==0.24.1
|
| 7 |
+
|
| 8 |
+
# NLP and processing libraries
|
| 9 |
nltk==3.8.1
|
|
|
|
| 10 |
scikit-learn==1.3.2
|
| 11 |
+
numpy==1.25.2
|
| 12 |
+
pandas==2.1.3
|
| 13 |
+
|
| 14 |
+
# Web interface
|
| 15 |
+
gradio==4.7.1
|
| 16 |
|
| 17 |
+
# Build and utility libraries
|
| 18 |
+
setuptools
|
| 19 |
+
wheel
|
| 20 |
+
packaging
|
text_humanizer_production.py
ADDED
|
@@ -0,0 +1,585 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import random
|
| 3 |
+
import nltk
|
| 4 |
+
from typing import List, Dict, Optional
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
# Download required NLTK data
|
| 8 |
+
try:
|
| 9 |
+
nltk.data.find('tokenizers/punkt')
|
| 10 |
+
except LookupError:
|
| 11 |
+
nltk.download('punkt')
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
nltk.data.find('corpora/wordnet')
|
| 15 |
+
except LookupError:
|
| 16 |
+
nltk.download('wordnet')
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
nltk.data.find('corpora/omw-1.4')
|
| 20 |
+
except LookupError:
|
| 21 |
+
nltk.download('omw-1.4')
|
| 22 |
+
|
| 23 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 24 |
+
|
| 25 |
+
# Production-grade imports with proper error handling and retries
|
| 26 |
+
def safe_import_with_retry(module_name, component=None, max_retries=3):
|
| 27 |
+
"""Import with retries and detailed error reporting"""
|
| 28 |
+
for attempt in range(max_retries):
|
| 29 |
+
try:
|
| 30 |
+
if component:
|
| 31 |
+
module = __import__(module_name, fromlist=[component])
|
| 32 |
+
return getattr(module, component), True
|
| 33 |
+
else:
|
| 34 |
+
return __import__(module_name), True
|
| 35 |
+
except ImportError as e:
|
| 36 |
+
if attempt < max_retries - 1:
|
| 37 |
+
print(f"⚠️ Import attempt {attempt + 1} failed for {module_name}: {e}")
|
| 38 |
+
print(f"🔄 Retrying in 2 seconds...")
|
| 39 |
+
import time
|
| 40 |
+
time.sleep(2)
|
| 41 |
+
continue
|
| 42 |
+
else:
|
| 43 |
+
print(f"❌ Final import failed for {module_name}: {e}")
|
| 44 |
+
return None, False
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"❌ Unexpected error importing {module_name}: {e}")
|
| 47 |
+
return None, False
|
| 48 |
+
return None, False
|
| 49 |
+
|
| 50 |
+
# Advanced model imports with retries
|
| 51 |
+
print("🚀 Loading AI Text Humanizer - Production Version...")
|
| 52 |
+
print("=" * 50)
|
| 53 |
+
|
| 54 |
+
print("📥 Loading sentence transformers...")
|
| 55 |
+
SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_retry('sentence_transformers', 'SentenceTransformer')
|
| 56 |
+
|
| 57 |
+
print("📥 Loading transformers pipeline...")
|
| 58 |
+
pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_retry('transformers', 'pipeline')
|
| 59 |
+
|
| 60 |
+
print("📥 Loading scikit-learn...")
|
| 61 |
+
try:
|
| 62 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 63 |
+
from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity
|
| 64 |
+
SKLEARN_AVAILABLE = True
|
| 65 |
+
print("✅ Scikit-learn loaded successfully")
|
| 66 |
+
except ImportError as e:
|
| 67 |
+
print(f"⚠️ Scikit-learn not available: {e}")
|
| 68 |
+
SKLEARN_AVAILABLE = False
|
| 69 |
+
|
| 70 |
+
# Additional production imports
|
| 71 |
+
try:
|
| 72 |
+
import torch
|
| 73 |
+
TORCH_AVAILABLE = True
|
| 74 |
+
print(f"✅ PyTorch loaded - CUDA available: {torch.cuda.is_available()}")
|
| 75 |
+
except ImportError:
|
| 76 |
+
TORCH_AVAILABLE = False
|
| 77 |
+
print("⚠️ PyTorch not available")
|
| 78 |
+
|
| 79 |
+
class ProductionAITextHumanizer:
|
| 80 |
+
def __init__(self, enable_gpu=True, model_cache_dir=None):
|
| 81 |
+
"""Initialize production-grade text humanizer with all advanced features"""
|
| 82 |
+
print("🏭 Initializing Production AI Text Humanizer...")
|
| 83 |
+
|
| 84 |
+
self.enable_gpu = enable_gpu and TORCH_AVAILABLE
|
| 85 |
+
self.model_cache_dir = model_cache_dir
|
| 86 |
+
|
| 87 |
+
# Initialize advanced models with detailed error handling
|
| 88 |
+
self._load_similarity_model()
|
| 89 |
+
self._load_paraphrasing_model()
|
| 90 |
+
self._initialize_fallback_methods()
|
| 91 |
+
self._setup_word_mappings()
|
| 92 |
+
|
| 93 |
+
print("✅ Production AI Text Humanizer initialized!")
|
| 94 |
+
self._print_feature_status()
|
| 95 |
+
|
| 96 |
+
def _load_similarity_model(self):
|
| 97 |
+
"""Load sentence transformer with production settings"""
|
| 98 |
+
self.similarity_model = None
|
| 99 |
+
|
| 100 |
+
if SENTENCE_TRANSFORMERS_AVAILABLE and SentenceTransformer:
|
| 101 |
+
try:
|
| 102 |
+
print("🔄 Loading sentence transformer model...")
|
| 103 |
+
|
| 104 |
+
# Production settings
|
| 105 |
+
model_kwargs = {
|
| 106 |
+
'device': 'cuda' if self.enable_gpu and torch.cuda.is_available() else 'cpu'
|
| 107 |
+
} if TORCH_AVAILABLE else {}
|
| 108 |
+
|
| 109 |
+
if self.model_cache_dir:
|
| 110 |
+
model_kwargs['cache_folder'] = self.model_cache_dir
|
| 111 |
+
|
| 112 |
+
self.similarity_model = SentenceTransformer(
|
| 113 |
+
'all-MiniLM-L6-v2',
|
| 114 |
+
**model_kwargs
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Test the model
|
| 118 |
+
test_embedding = self.similarity_model.encode(["test sentence"])
|
| 119 |
+
print("✅ Sentence transformer model loaded and tested successfully!")
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"❌ Failed to load sentence transformer: {e}")
|
| 123 |
+
print("💡 Troubleshooting tips:")
|
| 124 |
+
print(" - Check internet connection for model download")
|
| 125 |
+
print(" - Verify sentence-transformers version: pip install sentence-transformers==2.2.2")
|
| 126 |
+
print(" - Check CUDA compatibility if using GPU")
|
| 127 |
+
self.similarity_model = None
|
| 128 |
+
else:
|
| 129 |
+
print("❌ Sentence transformers not available")
|
| 130 |
+
|
| 131 |
+
def _load_paraphrasing_model(self):
|
| 132 |
+
"""Load paraphrasing model with production settings"""
|
| 133 |
+
self.paraphraser = None
|
| 134 |
+
|
| 135 |
+
if TRANSFORMERS_AVAILABLE and pipeline:
|
| 136 |
+
try:
|
| 137 |
+
print("🔄 Loading paraphrasing model...")
|
| 138 |
+
|
| 139 |
+
# Production settings
|
| 140 |
+
device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1
|
| 141 |
+
|
| 142 |
+
self.paraphraser = pipeline(
|
| 143 |
+
"text2text-generation",
|
| 144 |
+
model="google/flan-t5-small",
|
| 145 |
+
device=device,
|
| 146 |
+
max_length=512,
|
| 147 |
+
model_kwargs={"cache_dir": self.model_cache_dir} if self.model_cache_dir else {}
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Test the model
|
| 151 |
+
test_result = self.paraphraser("Test sentence for paraphrasing.", max_length=50)
|
| 152 |
+
print("✅ Paraphrasing model loaded and tested successfully!")
|
| 153 |
+
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"❌ Failed to load paraphrasing model: {e}")
|
| 156 |
+
print("💡 Troubleshooting tips:")
|
| 157 |
+
print(" - Check internet connection for model download")
|
| 158 |
+
print(" - Verify transformers version: pip install transformers==4.35.0")
|
| 159 |
+
print(" - Check available memory (models need ~2GB RAM)")
|
| 160 |
+
self.paraphraser = None
|
| 161 |
+
else:
|
| 162 |
+
print("❌ Transformers not available")
|
| 163 |
+
|
| 164 |
+
def _initialize_fallback_methods(self):
|
| 165 |
+
"""Initialize fallback similarity methods"""
|
| 166 |
+
self.tfidf_vectorizer = None
|
| 167 |
+
if SKLEARN_AVAILABLE:
|
| 168 |
+
try:
|
| 169 |
+
self.tfidf_vectorizer = TfidfVectorizer(
|
| 170 |
+
stop_words='english',
|
| 171 |
+
ngram_range=(1, 2),
|
| 172 |
+
max_features=5000
|
| 173 |
+
)
|
| 174 |
+
print("✅ TF-IDF fallback similarity initialized")
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"⚠️ TF-IDF initialization failed: {e}")
|
| 177 |
+
|
| 178 |
+
def _setup_word_mappings(self):
|
| 179 |
+
"""Setup comprehensive word mappings for production"""
|
| 180 |
+
# Extended formal to casual mappings for production
|
| 181 |
+
self.formal_to_casual = {
|
| 182 |
+
# Basic formal words
|
| 183 |
+
"utilize": "use", "demonstrate": "show", "facilitate": "help",
|
| 184 |
+
"implement": "do", "consequently": "so", "therefore": "so",
|
| 185 |
+
"nevertheless": "but", "furthermore": "also", "moreover": "also",
|
| 186 |
+
"subsequently": "then", "accordingly": "so", "regarding": "about",
|
| 187 |
+
"concerning": "about", "pertaining": "about", "approximately": "about",
|
| 188 |
+
"endeavor": "try", "commence": "start", "terminate": "end",
|
| 189 |
+
"obtain": "get", "purchase": "buy", "examine": "look at",
|
| 190 |
+
"analyze": "study", "construct": "build", "establish": "set up",
|
| 191 |
+
|
| 192 |
+
# Advanced formal words
|
| 193 |
+
"magnitude": "size", "comprehensive": "complete", "significant": "big",
|
| 194 |
+
"substantial": "large", "optimal": "best", "sufficient": "enough",
|
| 195 |
+
"adequate": "good enough", "exceptional": "amazing", "remarkable": "great",
|
| 196 |
+
"outstanding": "excellent", "predominant": "main", "fundamental": "basic",
|
| 197 |
+
"essential": "needed", "crucial": "important", "vital": "key",
|
| 198 |
+
"paramount": "most important", "imperative": "must", "mandatory": "required",
|
| 199 |
+
|
| 200 |
+
# Formal phrases
|
| 201 |
+
"prior to": "before", "in order to": "to", "due to the fact that": "because",
|
| 202 |
+
"at this point in time": "now", "in the event that": "if",
|
| 203 |
+
"it is important to note": "note that", "it should be emphasized": "remember",
|
| 204 |
+
"it is worth mentioning": "by the way", "it is crucial to understand": "importantly",
|
| 205 |
+
"for the purpose of": "to", "with regard to": "about",
|
| 206 |
+
"in accordance with": "following", "as a result of": "because of",
|
| 207 |
+
"in spite of the fact that": "although", "on the other hand": "however",
|
| 208 |
+
|
| 209 |
+
# Academic/business terms
|
| 210 |
+
"methodology": "method", "systematically": "step by step",
|
| 211 |
+
"optimization": "improvement", "enhancement": "upgrade",
|
| 212 |
+
"implementation": "setup", "utilization": "use", "evaluation": "review",
|
| 213 |
+
"assessment": "check", "validation": "proof", "verification": "confirmation",
|
| 214 |
+
"consolidation": "combining", "integration": "bringing together",
|
| 215 |
+
"transformation": "change", "modification": "change", "alteration": "change"
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
# Extended contractions
|
| 219 |
+
self.contractions = {
|
| 220 |
+
"do not": "don't", "does not": "doesn't", "did not": "didn't",
|
| 221 |
+
"will not": "won't", "would not": "wouldn't", "should not": "shouldn't",
|
| 222 |
+
"could not": "couldn't", "cannot": "can't", "is not": "isn't",
|
| 223 |
+
"are not": "aren't", "was not": "wasn't", "were not": "weren't",
|
| 224 |
+
"have not": "haven't", "has not": "hasn't", "had not": "hadn't",
|
| 225 |
+
"I am": "I'm", "you are": "you're", "he is": "he's", "she is": "she's",
|
| 226 |
+
"it is": "it's", "we are": "we're", "they are": "they're",
|
| 227 |
+
"I have": "I've", "you have": "you've", "we have": "we've",
|
| 228 |
+
"they have": "they've", "I will": "I'll", "you will": "you'll",
|
| 229 |
+
"he will": "he'll", "she will": "she'll", "it will": "it'll",
|
| 230 |
+
"we will": "we'll", "they will": "they'll", "would have": "would've",
|
| 231 |
+
"should have": "should've", "could have": "could've", "might have": "might've"
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
# AI-like transitions (expanded)
|
| 235 |
+
self.ai_transition_words = [
|
| 236 |
+
"Furthermore,", "Moreover,", "Additionally,", "Subsequently,",
|
| 237 |
+
"Consequently,", "Therefore,", "Nevertheless,", "However,",
|
| 238 |
+
"In conclusion,", "To summarize,", "In summary,", "Overall,",
|
| 239 |
+
"It is important to note that", "It should be emphasized that",
|
| 240 |
+
"It is worth mentioning that", "It is crucial to understand that",
|
| 241 |
+
"It is essential to recognize that", "It must be acknowledged that",
|
| 242 |
+
"It should be noted that", "It is imperative to understand",
|
| 243 |
+
"From a practical standpoint,", "From an analytical perspective,",
|
| 244 |
+
"In terms of implementation,", "With respect to the aforementioned,",
|
| 245 |
+
"As previously mentioned,", "As stated earlier,", "In light of this,"
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
# Natural alternatives (expanded)
|
| 249 |
+
self.natural_transitions = [
|
| 250 |
+
"Also,", "Plus,", "And,", "Then,", "So,", "But,", "Still,",
|
| 251 |
+
"Anyway,", "By the way,", "Actually,", "Basically,", "Look,",
|
| 252 |
+
"Listen,", "Here's the thing:", "The point is,", "What's more,",
|
| 253 |
+
"On top of that,", "Another thing,", "Now,", "Well,", "You know,",
|
| 254 |
+
"I mean,", "Honestly,", "Frankly,", "Simply put,", "In other words,",
|
| 255 |
+
"To put it differently,", "Let me explain,", "Here's what I mean:",
|
| 256 |
+
"Think about it,", "Consider this,", "Get this,", "Check this out,"
|
| 257 |
+
]
|
| 258 |
+
|
| 259 |
+
def _print_feature_status(self):
|
| 260 |
+
"""Print detailed feature status for production monitoring"""
|
| 261 |
+
print("\n📊 PRODUCTION FEATURE STATUS:")
|
| 262 |
+
print("-" * 40)
|
| 263 |
+
print(f"🔤 Advanced Similarity: {'✅ ENABLED' if self.similarity_model else '❌ DISABLED'}")
|
| 264 |
+
print(f"🧠 AI Paraphrasing: {'✅ ENABLED' if self.paraphraser else '❌ DISABLED'}")
|
| 265 |
+
print(f"📊 TF-IDF Fallback: {'✅ ENABLED' if self.tfidf_vectorizer else '❌ DISABLED'}")
|
| 266 |
+
print(f"🚀 GPU Acceleration: {'✅ ENABLED' if self.enable_gpu and TORCH_AVAILABLE else '❌ DISABLED'}")
|
| 267 |
+
print(f"⚡ Word Mappings: ✅ ENABLED ({len(self.formal_to_casual)} mappings)")
|
| 268 |
+
print(f"📝 Contractions: ✅ ENABLED ({len(self.contractions)} contractions)")
|
| 269 |
+
|
| 270 |
+
if TORCH_AVAILABLE:
|
| 271 |
+
import torch
|
| 272 |
+
print(f"🖥️ Device: {'CUDA' if torch.cuda.is_available() and self.enable_gpu else 'CPU'}")
|
| 273 |
+
|
| 274 |
+
# Calculate feature completeness
|
| 275 |
+
total_features = 6
|
| 276 |
+
enabled_features = sum([
|
| 277 |
+
bool(self.similarity_model),
|
| 278 |
+
bool(self.paraphraser),
|
| 279 |
+
bool(self.tfidf_vectorizer),
|
| 280 |
+
True, # Word mappings always available
|
| 281 |
+
True, # Contractions always available
|
| 282 |
+
TORCH_AVAILABLE
|
| 283 |
+
])
|
| 284 |
+
|
| 285 |
+
completeness = (enabled_features / total_features) * 100
|
| 286 |
+
print(f"🎯 Feature Completeness: {completeness:.1f}%")
|
| 287 |
+
|
| 288 |
+
if completeness < 70:
|
| 289 |
+
print("⚠️ WARNING: Less than 70% features enabled - not production ready")
|
| 290 |
+
elif completeness < 90:
|
| 291 |
+
print("⚠️ CAUTION: Some advanced features missing")
|
| 292 |
+
else:
|
| 293 |
+
print("🎉 PRODUCTION READY: All critical features enabled!")
|
| 294 |
+
|
| 295 |
+
def add_contractions(self, text: str) -> str:
|
| 296 |
+
"""Add contractions with improved pattern matching"""
|
| 297 |
+
# Sort by length (longest first) to avoid partial replacements
|
| 298 |
+
sorted_contractions = sorted(self.contractions.items(), key=lambda x: len(x[0]), reverse=True)
|
| 299 |
+
|
| 300 |
+
for formal, casual in sorted_contractions:
|
| 301 |
+
# Use word boundaries to avoid partial matches
|
| 302 |
+
pattern = r'\b' + re.escape(formal) + r'\b'
|
| 303 |
+
text = re.sub(pattern, casual, text, flags=re.IGNORECASE)
|
| 304 |
+
|
| 305 |
+
return text
|
| 306 |
+
|
| 307 |
+
def replace_formal_words(self, text: str, replacement_rate: float = 0.8) -> str:
|
| 308 |
+
"""Enhanced formal word replacement with context awareness"""
|
| 309 |
+
# Handle phrases first (longer matches)
|
| 310 |
+
phrase_replacements = {k: v for k, v in self.formal_to_casual.items() if len(k.split()) > 1}
|
| 311 |
+
word_replacements = {k: v for k, v in self.formal_to_casual.items() if len(k.split()) == 1}
|
| 312 |
+
|
| 313 |
+
# Replace phrases first
|
| 314 |
+
for formal_phrase, casual_phrase in phrase_replacements.items():
|
| 315 |
+
if random.random() < replacement_rate:
|
| 316 |
+
pattern = r'\b' + re.escape(formal_phrase) + r'\b'
|
| 317 |
+
text = re.sub(pattern, casual_phrase, text, flags=re.IGNORECASE)
|
| 318 |
+
|
| 319 |
+
# Then replace individual words
|
| 320 |
+
words = word_tokenize(text)
|
| 321 |
+
for i, word in enumerate(words):
|
| 322 |
+
word_clean = word.lower().strip('.,!?;:"')
|
| 323 |
+
if word_clean in word_replacements and random.random() < replacement_rate:
|
| 324 |
+
replacement = word_replacements[word_clean]
|
| 325 |
+
# Preserve case
|
| 326 |
+
if word.isupper():
|
| 327 |
+
words[i] = word.replace(word_clean, replacement.upper())
|
| 328 |
+
elif word.istitle():
|
| 329 |
+
words[i] = word.replace(word_clean, replacement.title())
|
| 330 |
+
else:
|
| 331 |
+
words[i] = word.replace(word_clean, replacement)
|
| 332 |
+
|
| 333 |
+
# Reconstruct with proper spacing
|
| 334 |
+
result = ""
|
| 335 |
+
for i, word in enumerate(words):
|
| 336 |
+
if i > 0 and word not in ".,!?;:\"')":
|
| 337 |
+
result += " "
|
| 338 |
+
result += word
|
| 339 |
+
|
| 340 |
+
return result
|
| 341 |
+
|
| 342 |
+
def replace_ai_transitions(self, text: str) -> str:
|
| 343 |
+
"""Enhanced AI transition replacement with context awareness"""
|
| 344 |
+
# Sort by length to handle longer phrases first
|
| 345 |
+
sorted_transitions = sorted(self.ai_transition_words, key=len, reverse=True)
|
| 346 |
+
|
| 347 |
+
for ai_transition in sorted_transitions:
|
| 348 |
+
if ai_transition in text:
|
| 349 |
+
# Choose appropriate natural replacement based on context
|
| 350 |
+
natural_replacement = random.choice(self.natural_transitions)
|
| 351 |
+
|
| 352 |
+
# Adjust replacement based on sentence position
|
| 353 |
+
if text.startswith(ai_transition):
|
| 354 |
+
# Beginning of text
|
| 355 |
+
text = text.replace(ai_transition, natural_replacement, 1)
|
| 356 |
+
else:
|
| 357 |
+
# Middle of text - be more selective
|
| 358 |
+
if random.random() < 0.7: # 70% chance to replace
|
| 359 |
+
text = text.replace(ai_transition, natural_replacement, 1)
|
| 360 |
+
|
| 361 |
+
return text
|
| 362 |
+
|
| 363 |
+
def advanced_paraphrasing(self, text: str, paraphrase_rate: float = 0.4) -> str:
|
| 364 |
+
"""Production-grade paraphrasing with quality control"""
|
| 365 |
+
if not self.paraphraser:
|
| 366 |
+
return text
|
| 367 |
+
|
| 368 |
+
sentences = sent_tokenize(text)
|
| 369 |
+
paraphrased_sentences = []
|
| 370 |
+
|
| 371 |
+
for sentence in sentences:
|
| 372 |
+
# Only paraphrase longer, more complex sentences
|
| 373 |
+
if len(sentence.split()) > 10 and random.random() < paraphrase_rate:
|
| 374 |
+
try:
|
| 375 |
+
# Multiple paraphrasing strategies
|
| 376 |
+
prompts = [
|
| 377 |
+
f"Rewrite this more naturally: {sentence}",
|
| 378 |
+
f"Make this sound more conversational: {sentence}",
|
| 379 |
+
f"Rephrase this in simpler terms: {sentence}",
|
| 380 |
+
f"Say this in a more casual way: {sentence}"
|
| 381 |
+
]
|
| 382 |
+
|
| 383 |
+
prompt = random.choice(prompts)
|
| 384 |
+
|
| 385 |
+
result = self.paraphraser(
|
| 386 |
+
prompt,
|
| 387 |
+
max_length=len(sentence) + 50,
|
| 388 |
+
min_length=max(10, len(sentence) // 2),
|
| 389 |
+
num_return_sequences=1,
|
| 390 |
+
temperature=0.7,
|
| 391 |
+
do_sample=True
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
paraphrased = result[0]['generated_text']
|
| 395 |
+
paraphrased = paraphrased.replace(prompt, '').strip().strip('"\'')
|
| 396 |
+
|
| 397 |
+
# Quality checks
|
| 398 |
+
if (paraphrased and
|
| 399 |
+
len(paraphrased) > 5 and
|
| 400 |
+
len(paraphrased) < len(sentence) * 2 and
|
| 401 |
+
not paraphrased.lower().startswith(('i cannot', 'i can\'t', 'sorry'))):
|
| 402 |
+
paraphrased_sentences.append(paraphrased)
|
| 403 |
+
else:
|
| 404 |
+
paraphrased_sentences.append(sentence)
|
| 405 |
+
|
| 406 |
+
except Exception as e:
|
| 407 |
+
print(f"⚠️ Paraphrasing failed: {e}")
|
| 408 |
+
paraphrased_sentences.append(sentence)
|
| 409 |
+
else:
|
| 410 |
+
paraphrased_sentences.append(sentence)
|
| 411 |
+
|
| 412 |
+
return ' '.join(paraphrased_sentences)
|
| 413 |
+
|
| 414 |
+
def calculate_similarity_advanced(self, text1: str, text2: str) -> float:
|
| 415 |
+
"""Production-grade similarity calculation"""
|
| 416 |
+
if self.similarity_model:
|
| 417 |
+
try:
|
| 418 |
+
embeddings1 = self.similarity_model.encode([text1])
|
| 419 |
+
embeddings2 = self.similarity_model.encode([text2])
|
| 420 |
+
similarity = np.dot(embeddings1[0], embeddings2[0]) / (
|
| 421 |
+
np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0])
|
| 422 |
+
)
|
| 423 |
+
return float(similarity)
|
| 424 |
+
except Exception as e:
|
| 425 |
+
print(f"⚠️ Advanced similarity calculation failed: {e}")
|
| 426 |
+
|
| 427 |
+
# Fallback to TF-IDF
|
| 428 |
+
if self.tfidf_vectorizer and SKLEARN_AVAILABLE:
|
| 429 |
+
try:
|
| 430 |
+
tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2])
|
| 431 |
+
similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
|
| 432 |
+
return float(similarity)
|
| 433 |
+
except Exception as e:
|
| 434 |
+
print(f"⚠️ TF-IDF similarity calculation failed: {e}")
|
| 435 |
+
|
| 436 |
+
# Basic fallback
|
| 437 |
+
words1 = set(word_tokenize(text1.lower()))
|
| 438 |
+
words2 = set(word_tokenize(text2.lower()))
|
| 439 |
+
if not words1 or not words2:
|
| 440 |
+
return 1.0 if text1 == text2 else 0.0
|
| 441 |
+
|
| 442 |
+
intersection = words1.intersection(words2)
|
| 443 |
+
union = words1.union(words2)
|
| 444 |
+
return len(intersection) / len(union) if union else 1.0
|
| 445 |
+
|
| 446 |
+
def humanize_text_production(self,
|
| 447 |
+
text: str,
|
| 448 |
+
style: str = "natural",
|
| 449 |
+
intensity: float = 0.8,
|
| 450 |
+
preserve_length: bool = True,
|
| 451 |
+
quality_threshold: float = 0.75) -> Dict:
|
| 452 |
+
"""
|
| 453 |
+
Production-grade text humanization with comprehensive quality control
|
| 454 |
+
|
| 455 |
+
Args:
|
| 456 |
+
text: Input text to humanize
|
| 457 |
+
style: Style ('natural', 'casual', 'conversational')
|
| 458 |
+
intensity: Transformation intensity (0.0 to 1.0)
|
| 459 |
+
preserve_length: Try to maintain similar text length
|
| 460 |
+
quality_threshold: Minimum similarity score to accept
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
Comprehensive results with quality metrics
|
| 464 |
+
"""
|
| 465 |
+
if not text.strip():
|
| 466 |
+
return {
|
| 467 |
+
"original_text": text,
|
| 468 |
+
"humanized_text": text,
|
| 469 |
+
"similarity_score": 1.0,
|
| 470 |
+
"changes_made": [],
|
| 471 |
+
"style": style,
|
| 472 |
+
"intensity": intensity,
|
| 473 |
+
"quality_score": 1.0,
|
| 474 |
+
"processing_time_ms": 0.0,
|
| 475 |
+
"feature_usage": {}
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
import time
|
| 479 |
+
start_time = time.time()
|
| 480 |
+
|
| 481 |
+
changes_made = []
|
| 482 |
+
humanized_text = text
|
| 483 |
+
original_text = text
|
| 484 |
+
feature_usage = {}
|
| 485 |
+
|
| 486 |
+
# Step 1: AI transition replacement (early to catch obvious AI patterns)
|
| 487 |
+
if intensity > 0.2:
|
| 488 |
+
before = humanized_text
|
| 489 |
+
humanized_text = self.replace_ai_transitions(humanized_text)
|
| 490 |
+
if humanized_text != before:
|
| 491 |
+
changes_made.append("Replaced AI-like transition phrases")
|
| 492 |
+
feature_usage['ai_transitions'] = True
|
| 493 |
+
|
| 494 |
+
# Step 2: Formal word replacement
|
| 495 |
+
if intensity > 0.3:
|
| 496 |
+
before = humanized_text
|
| 497 |
+
humanized_text = self.replace_formal_words(humanized_text, intensity * 0.9)
|
| 498 |
+
if humanized_text != before:
|
| 499 |
+
changes_made.append("Replaced formal words with casual alternatives")
|
| 500 |
+
feature_usage['word_replacement'] = True
|
| 501 |
+
|
| 502 |
+
# Step 3: Add contractions
|
| 503 |
+
if intensity > 0.4:
|
| 504 |
+
before = humanized_text
|
| 505 |
+
humanized_text = self.add_contractions(humanized_text)
|
| 506 |
+
if humanized_text != before:
|
| 507 |
+
changes_made.append("Added natural contractions")
|
| 508 |
+
feature_usage['contractions'] = True
|
| 509 |
+
|
| 510 |
+
# Step 4: Advanced paraphrasing (if available)
|
| 511 |
+
if intensity > 0.6 and self.paraphraser:
|
| 512 |
+
before = humanized_text
|
| 513 |
+
humanized_text = self.advanced_paraphrasing(humanized_text, intensity * 0.5)
|
| 514 |
+
if humanized_text != before:
|
| 515 |
+
changes_made.append("Applied AI paraphrasing for natural flow")
|
| 516 |
+
feature_usage['paraphrasing'] = True
|
| 517 |
+
|
| 518 |
+
# Step 5: Calculate quality metrics
|
| 519 |
+
processing_time = (time.time() - start_time) * 1000
|
| 520 |
+
similarity_score = self.calculate_similarity_advanced(original_text, humanized_text)
|
| 521 |
+
|
| 522 |
+
# Quality control - revert if similarity too low
|
| 523 |
+
if similarity_score < quality_threshold:
|
| 524 |
+
print(f"⚠️ Quality check failed (similarity: {similarity_score:.3f})")
|
| 525 |
+
humanized_text = original_text
|
| 526 |
+
similarity_score = 1.0
|
| 527 |
+
changes_made = ["Quality threshold not met - reverted to original"]
|
| 528 |
+
feature_usage['quality_control'] = True
|
| 529 |
+
|
| 530 |
+
# Calculate comprehensive quality score
|
| 531 |
+
length_ratio = len(humanized_text) / len(original_text) if original_text else 1.0
|
| 532 |
+
length_penalty = max(0, 1.0 - abs(length_ratio - 1.0)) if preserve_length else 1.0
|
| 533 |
+
change_score = min(1.0, len(changes_made) / 5.0) # Reward meaningful changes
|
| 534 |
+
|
| 535 |
+
quality_score = (similarity_score * 0.5) + (length_penalty * 0.3) + (change_score * 0.2)
|
| 536 |
+
|
| 537 |
+
return {
|
| 538 |
+
"original_text": original_text,
|
| 539 |
+
"humanized_text": humanized_text,
|
| 540 |
+
"similarity_score": similarity_score,
|
| 541 |
+
"quality_score": quality_score,
|
| 542 |
+
"changes_made": changes_made,
|
| 543 |
+
"style": style,
|
| 544 |
+
"intensity": intensity,
|
| 545 |
+
"processing_time_ms": processing_time,
|
| 546 |
+
"feature_usage": feature_usage,
|
| 547 |
+
"length_change": len(humanized_text) - len(original_text),
|
| 548 |
+
"word_count_change": len(humanized_text.split()) - len(original_text.split())
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
# Convenience function for backward compatibility
|
| 552 |
+
def AITextHumanizer():
|
| 553 |
+
"""Factory function for backward compatibility"""
|
| 554 |
+
return ProductionAITextHumanizer()
|
| 555 |
+
|
| 556 |
+
# Test the production version
|
| 557 |
+
if __name__ == "__main__":
|
| 558 |
+
humanizer = ProductionAITextHumanizer()
|
| 559 |
+
|
| 560 |
+
test_texts = [
|
| 561 |
+
"Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities.",
|
| 562 |
+
"The implementation of comprehensive methodologies will facilitate optimization and enhance operational efficiency.",
|
| 563 |
+
"Subsequently, organizations must utilize systematic approaches to evaluate and implement technological solutions."
|
| 564 |
+
]
|
| 565 |
+
|
| 566 |
+
print(f"\n🧪 TESTING PRODUCTION HUMANIZER")
|
| 567 |
+
print("=" * 40)
|
| 568 |
+
|
| 569 |
+
for i, test_text in enumerate(test_texts, 1):
|
| 570 |
+
print(f"\n🔬 Test {i}:")
|
| 571 |
+
print(f"Original: {test_text}")
|
| 572 |
+
|
| 573 |
+
result = humanizer.humanize_text_production(
|
| 574 |
+
text=test_text,
|
| 575 |
+
style="conversational",
|
| 576 |
+
intensity=0.8
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
print(f"Humanized: {result['humanized_text']}")
|
| 580 |
+
print(f"Quality Score: {result['quality_score']:.3f}")
|
| 581 |
+
print(f"Similarity: {result['similarity_score']:.3f}")
|
| 582 |
+
print(f"Processing: {result['processing_time_ms']:.1f}ms")
|
| 583 |
+
print(f"Changes: {', '.join(result['changes_made']) if result['changes_made'] else 'None'}")
|
| 584 |
+
|
| 585 |
+
print(f"\n🎉 Production testing completed!")
|