Jay-Rajput commited on
Commit
5c9a55b
·
1 Parent(s): 5b7b927

adv humanizer

Browse files
Files changed (5) hide show
  1. app.py +246 -154
  2. diagnose_and_fix.py +228 -0
  3. install_production.py +354 -0
  4. requirements.txt +17 -7
  5. text_humanizer_production.py +585 -0
app.py CHANGED
@@ -1,120 +1,204 @@
1
- # For Hugging Face Spaces - this is the main app file with fallback dependencies
 
 
2
  import gradio as gr
3
  import time
4
  import os
 
5
 
6
- # Import our robust humanizer that handles dependency issues
7
- from text_humanizer import AITextHumanizer
8
 
9
- # Initialize the humanizer
10
- print("🚀 Loading AI Text Humanizer for Hugging Face Spaces...")
11
- try:
12
- humanizer = AITextHumanizer()
13
- print("✅ Humanizer loaded successfully!")
14
- except Exception as e:
15
- print(f"❌ Error loading humanizer: {e}")
16
- humanizer = None
17
 
18
- def humanize_text_hf(text, style, intensity):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  """
20
- Hugging Face Spaces interface function for text humanization
21
  """
22
  if not text.strip():
23
- return "⚠️ Please enter some text to humanize.", "", 0.0, "No changes made", 0.0
24
 
25
  if humanizer is None:
26
- return "❌ Error: Humanizer not loaded properly.", "", 0.0, "System error", 0.0
27
 
28
  try:
29
  start_time = time.time()
30
 
31
- # Humanize the text
32
- result = humanizer.humanize_text(
33
  text=text,
34
  style=style.lower(),
35
- intensity=intensity
 
36
  )
37
 
38
  processing_time = (time.time() - start_time) * 1000
39
 
40
- changes_text = ", ".join(result["changes_made"]) if result["changes_made"] else "No significant changes made"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- return (
43
- result["humanized_text"],
44
- f"**📊 Processing Results:**\n- **Similarity Score:** {result['similarity_score']:.3f}\n- **Processing Time:** {processing_time:.1f}ms\n- **Style:** {result['style'].title()}\n- **Intensity:** {result['intensity']}\n\n**🔄 Changes Made:** {changes_text}",
45
- result["similarity_score"],
46
- changes_text,
47
- processing_time
48
- )
 
 
 
 
49
 
50
  except Exception as e:
51
- return f"❌ Error processing text: {str(e)}", "", 0.0, "Processing error", 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- # Create the Hugging Face Spaces interface
54
  with gr.Blocks(
55
- title="🤖➡️👤 AI Text Humanizer",
56
  theme=gr.themes.Soft(),
57
  css="""
58
  .main-header {
59
  text-align: center;
60
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
61
  color: white;
62
- padding: 20px;
63
- border-radius: 10px;
64
- margin-bottom: 20px;
 
65
  }
 
 
 
 
 
 
 
 
 
 
66
  .stats-box {
 
 
 
 
 
 
 
67
  background: #f8f9fa;
68
  padding: 15px;
69
  border-radius: 8px;
70
- border-left: 4px solid #667eea;
71
- }
72
- .warning-box {
73
- background: #fff3cd;
74
- border: 1px solid #ffeaa7;
75
- color: #856404;
76
- padding: 10px;
77
- border-radius: 5px;
78
  margin: 10px 0;
79
  }
80
  """
81
- ) as iface:
82
 
83
- gr.HTML("""
84
  <div class="main-header">
85
- <h1>🤖➡️👤 AI Text Humanizer</h1>
86
- <p>Transform AI-generated text to sound more natural and human-like</p>
87
- <p><em>Powered by advanced NLP techniques - Works even with limited dependencies!</em></p>
88
  </div>
89
  """)
90
 
91
- # Check model availability and show warnings
92
- if humanizer:
93
- from text_humanizer import SENTENCE_TRANSFORMERS_AVAILABLE, TRANSFORMERS_AVAILABLE, SKLEARN_AVAILABLE
94
-
95
- if not SENTENCE_TRANSFORMERS_AVAILABLE:
96
- gr.HTML("""
97
- <div class="warning-box">
98
- ⚠️ <strong>Note:</strong> Advanced similarity models not available. Using fallback similarity calculation.
99
- </div>
100
- """)
101
-
102
- if not TRANSFORMERS_AVAILABLE:
103
- gr.HTML("""
104
- <div class="warning-box">
105
- ⚠️ <strong>Note:</strong> Paraphrasing models not available. Advanced paraphrasing disabled.
106
- </div>
107
- """)
108
 
109
- with gr.Tab("🎯 Humanize Text"):
110
  with gr.Row():
111
  with gr.Column(scale=1):
112
  gr.HTML("<h3>📝 Input</h3>")
113
 
114
  input_text = gr.Textbox(
115
  label="Text to Humanize",
116
- placeholder="Paste your AI-generated text here...\n\nExample: Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities...",
117
- lines=10,
118
  max_lines=20
119
  )
120
 
@@ -122,148 +206,156 @@ with gr.Blocks(
122
  style_dropdown = gr.Dropdown(
123
  choices=["Natural", "Casual", "Conversational"],
124
  value="Natural",
125
- label="🎨 Humanization Style"
 
126
  )
127
 
128
  intensity_slider = gr.Slider(
129
  minimum=0.1,
130
  maximum=1.0,
131
- value=0.7,
132
  step=0.1,
133
- label="⚡ Intensity Level"
 
134
  )
135
 
136
- humanize_btn = gr.Button(
137
- "🚀 Humanize Text",
138
- variant="primary",
139
- size="lg"
140
- )
 
 
 
 
 
 
141
 
142
  with gr.Column(scale=1):
143
  gr.HTML("<h3>✨ Output</h3>")
144
 
145
  output_text = gr.Textbox(
146
  label="Humanized Text",
147
- lines=10,
148
  max_lines=20,
149
  show_copy_button=True
150
  )
151
 
 
 
 
 
 
 
152
  stats_output = gr.Markdown(
153
- label="📊 Processing Statistics",
154
  value="Results will appear here after processing..."
155
  )
156
 
157
- with gr.Tab("📊 Examples & Guide"):
158
- gr.HTML("<h3>💡 Try These Examples</h3>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  # Examples
 
 
161
  examples = gr.Examples(
162
  examples=[
163
  [
164
  "Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy. Nevertheless, it is crucial to understand that human oversight remains essential for optimal performance.",
165
  "Conversational",
166
- 0.8
 
167
  ],
168
  [
169
- "The implementation of this comprehensive solution will facilitate the optimization of business processes and operational workflows. Moreover, it will demonstrate substantial improvements in efficiency metrics while maintaining quality standards throughout the organization.",
170
  "Natural",
171
- 0.6
 
172
  ],
173
  [
174
- "In conclusion, the systematic analysis reveals that the proposed methodology demonstrates significant potential for enhancing performance indicators. Additionally, the structured approach ensures optimal resource utilization and maintains quality benchmarks.",
175
  "Casual",
176
- 0.7
 
177
  ],
178
  [
179
- "It is essential to acknowledge that these technological advancements facilitate unprecedented opportunities for organizational growth. Therefore, stakeholders must implement comprehensive strategies to leverage these capabilities effectively.",
180
  "Conversational",
181
- 0.9
 
182
  ]
183
  ],
184
- inputs=[input_text, style_dropdown, intensity_slider],
185
- outputs=[output_text, stats_output],
186
  fn=humanize_text_hf,
187
- cache_examples=False
 
188
  )
189
 
 
190
  gr.HTML("""
191
- <div style="margin-top: 30px;">
192
- <h3>🎯 How It Works</h3>
193
- <div class="stats-box">
194
- <h4>🔧 Core Transformation Techniques:</h4>
195
- <ul>
196
- <li><strong>Smart Word Replacement:</strong> formal words casual alternatives (utilize → use, demonstrate → show)</li>
197
- <li><strong>Contraction Addition:</strong> "do not" "don't", "it is" → "it's"</li>
198
- <li><strong>AI Transition Removal:</strong> removes robotic phrases like "Furthermore," "Moreover,"</li>
199
- <li><strong>Sentence Restructuring:</strong> varies length and structure for natural flow</li>
200
- <li><strong>Natural Imperfections:</strong> adds human-like variations and casual touches</li>
201
- <li><strong>Context-Aware Processing:</strong> maintains meaning while improving readability</li>
202
- </ul>
203
- </div>
204
-
205
- <div class="stats-box" style="margin-top: 15px;">
206
- <h4>🎨 Style Guide:</h4>
207
- <ul>
208
- <li><strong>Natural (0.5-0.7):</strong> Professional content with human touch - good for business writing</li>
209
- <li><strong>Casual (0.6-0.8):</strong> Blog posts, articles, informal content - relaxed but clear</li>
210
- <li><strong>Conversational (0.7-1.0):</strong> Social media, very informal text - like talking to a friend</li>
211
- </ul>
212
- </div>
213
-
214
- <div class="stats-box" style="margin-top: 15px;">
215
- <h4>⚡ Performance & Features:</h4>
216
- <ul>
217
- <li><strong>Similarity Preservation:</strong> Maintains 85-95% semantic similarity to original</li>
218
- <li><strong>Fast Processing:</strong> ~500ms average response time</li>
219
- <li><strong>Robust Fallbacks:</strong> Works even when advanced models aren't available</li>
220
- <li><strong>Quality Control:</strong> Automatic quality checks prevent over-transformation</li>
221
- <li><strong>Dependency Resilient:</strong> Graceful degradation when libraries are missing</li>
222
- </ul>
223
- </div>
224
-
225
- <div class="stats-box" style="margin-top: 15px;">
226
- <h4>🛠️ Technical Features:</h4>
227
- <ul>
228
- <li><strong>Multiple Similarity Methods:</strong> Advanced transformers → TF-IDF → word overlap fallbacks</li>
229
- <li><strong>Intelligent Processing:</strong> Context-aware transformations based on text type</li>
230
- <li><strong>Quality Assurance:</strong> Automatic reversion if similarity drops too low</li>
231
- <li><strong>Graceful Degradation:</strong> Works with minimal dependencies (just NLTK)</li>
232
- </ul>
233
- </div>
234
  </div>
235
  """)
236
 
237
- if humanizer:
238
- # Show current model status
239
- from text_humanizer import SENTENCE_TRANSFORMERS_AVAILABLE, TRANSFORMERS_AVAILABLE, SKLEARN_AVAILABLE
240
-
241
- gr.HTML(f"""
242
- <div class="stats-box" style="margin-top: 15px;">
243
- <h4>🔍 Current Model Status:</h4>
244
- <ul>
245
- <li><strong>Sentence Transformers:</strong> {'✅ Available (Advanced similarity)' if SENTENCE_TRANSFORMERS_AVAILABLE else '❌ Not available (Using fallback)'}</li>
246
- <li><strong>Transformers:</strong> {'✅ Available (Paraphrasing enabled)' if TRANSFORMERS_AVAILABLE else '❌ Not available (Paraphrasing disabled)'}</li>
247
- <li><strong>Scikit-learn:</strong> {'✅ Available (TF-IDF similarity)' if SKLEARN_AVAILABLE else '❌ Not available (Basic similarity)'}</li>
248
- <li><strong>NLTK:</strong> ✅ Available (Core text processing)</li>
249
- </ul>
250
- <p><em>The system automatically uses the best available methods and falls back gracefully when dependencies are missing.</em></p>
251
- </div>
252
- """)
253
 
254
  # Event handlers
255
  humanize_btn.click(
256
  fn=humanize_text_hf,
257
- inputs=[input_text, style_dropdown, intensity_slider],
258
- outputs=[output_text, stats_output]
259
  )
260
 
261
- # Launch for Hugging Face Spaces
262
  if __name__ == "__main__":
263
- print("🌐 Launching AI Text Humanizer on Hugging Face Spaces...")
264
- iface.launch(
265
- share=False, # HF Spaces handles sharing
 
 
266
  server_name="0.0.0.0",
267
  server_port=7860,
268
- show_error=True
 
269
  )
 
1
+ # Production-grade AI Text Humanizer for Hugging Face Spaces
2
+ # All advanced features enabled
3
+
4
  import gradio as gr
5
  import time
6
  import os
7
+ import sys
8
 
9
+ # Import our production humanizer
10
+ from text_humanizer_production import ProductionAITextHumanizer
11
 
12
+ # Global variables
13
+ humanizer = None
14
+ initialization_status = {}
 
 
 
 
 
15
 
16
+ def initialize_humanizer():
17
+ """Initialize the production humanizer with status tracking"""
18
+ global humanizer, initialization_status
19
+
20
+ print("🏭 Initializing Production AI Text Humanizer for Hugging Face Spaces...")
21
+
22
+ try:
23
+ # Enable HF Spaces optimizations
24
+ humanizer = ProductionAITextHumanizer(
25
+ enable_gpu=True, # HF Spaces may have GPU
26
+ model_cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface_cache')
27
+ )
28
+
29
+ initialization_status = {
30
+ "humanizer_loaded": True,
31
+ "advanced_similarity": humanizer.similarity_model is not None,
32
+ "ai_paraphrasing": humanizer.paraphraser is not None,
33
+ "tfidf_fallback": humanizer.tfidf_vectorizer is not None,
34
+ "total_features": 6,
35
+ "enabled_features": sum([
36
+ bool(humanizer.similarity_model),
37
+ bool(humanizer.paraphraser),
38
+ bool(humanizer.tfidf_vectorizer),
39
+ True, # Word mappings
40
+ True, # Contractions
41
+ True # Basic processing
42
+ ])
43
+ }
44
+
45
+ print("✅ Production humanizer initialized successfully!")
46
+ print(f"🎯 Feature completeness: {(initialization_status['enabled_features']/initialization_status['total_features'])*100:.1f}%")
47
+
48
+ return True
49
+
50
+ except Exception as e:
51
+ print(f"❌ Error initializing humanizer: {e}")
52
+ initialization_status = {"error": str(e), "humanizer_loaded": False}
53
+ return False
54
+
55
+ def humanize_text_hf(text, style, intensity, show_details=False):
56
  """
57
+ Hugging Face Spaces interface for production humanization
58
  """
59
  if not text.strip():
60
+ return "⚠️ Please enter some text to humanize.", "", ""
61
 
62
  if humanizer is None:
63
+ return "❌ Error: Humanizer not loaded properly. Please refresh the page.", "", ""
64
 
65
  try:
66
  start_time = time.time()
67
 
68
+ # Use production humanization method
69
+ result = humanizer.humanize_text_production(
70
  text=text,
71
  style=style.lower(),
72
+ intensity=intensity,
73
+ quality_threshold=0.75
74
  )
75
 
76
  processing_time = (time.time() - start_time) * 1000
77
 
78
+ # Format detailed stats
79
+ details = f"""**🎯 Production Results:**
80
+ - **Quality Score:** {result['quality_score']:.3f} (Higher = Better)
81
+ - **Similarity Score:** {result['similarity_score']:.3f} (Meaning Preservation)
82
+ - **Processing Time:** {processing_time:.1f}ms
83
+ - **Style:** {result['style'].title()}
84
+ - **Intensity:** {result['intensity']}
85
+ - **Length Change:** {result['length_change']} characters
86
+ - **Word Count Change:** {result['word_count_change']} words
87
+
88
+ **🔧 Features Used:**
89
+ {', '.join(result['feature_usage'].keys()) if result['feature_usage'] else 'Basic transformations only'}
90
+
91
+ **📝 Transformations Applied:**
92
+ {chr(10).join([f'• {change}' for change in result['changes_made']]) if result['changes_made'] else '• No significant changes needed'}"""
93
 
94
+ # Show feature status in details
95
+ feature_status = f"""
96
+ ** Advanced Features Status:**
97
+ - Advanced Similarity: {'✅ ENABLED' if initialization_status.get('advanced_similarity') else '❌ DISABLED'}
98
+ - AI Paraphrasing: {'✅ ENABLED' if initialization_status.get('ai_paraphrasing') else '❌ DISABLED'}
99
+ - Quality Control: ✅ ENABLED
100
+ - Feature Completeness: {(initialization_status.get('enabled_features', 3)/initialization_status.get('total_features', 6))*100:.1f}%"""
101
+
102
+ full_details = details + feature_status if show_details else details
103
+
104
+ return result['humanized_text'], full_details, f"✅ Success - Quality: {result['quality_score']:.3f}"
105
 
106
  except Exception as e:
107
+ error_msg = f"❌ Error processing text: {str(e)}"
108
+ return error_msg, "", "❌ Processing failed"
109
+
110
+ def get_feature_status():
111
+ """Get current feature status for display"""
112
+ if not initialization_status.get('humanizer_loaded'):
113
+ return "❌ Humanizer not loaded", "red"
114
+
115
+ enabled = initialization_status.get('enabled_features', 0)
116
+ total = initialization_status.get('total_features', 6)
117
+ completeness = (enabled / total) * 100
118
+
119
+ if completeness >= 90:
120
+ return f"🎉 Production Ready ({completeness:.0f}%)", "green"
121
+ elif completeness >= 70:
122
+ return f"⚠️ Most Features Enabled ({completeness:.0f}%)", "orange"
123
+ else:
124
+ return f"❌ Limited Features ({completeness:.0f}%)", "red"
125
+
126
+ # Initialize the humanizer on startup
127
+ initialization_success = initialize_humanizer()
128
 
129
+ # Create the Gradio interface
130
  with gr.Blocks(
131
+ title="🤖➡️👤 AI Text Humanizer Pro",
132
  theme=gr.themes.Soft(),
133
  css="""
134
  .main-header {
135
  text-align: center;
136
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
137
  color: white;
138
+ padding: 25px;
139
+ border-radius: 15px;
140
+ margin-bottom: 25px;
141
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1);
142
  }
143
+ .feature-status {
144
+ text-align: center;
145
+ padding: 10px;
146
+ border-radius: 8px;
147
+ margin: 10px 0;
148
+ font-weight: bold;
149
+ }
150
+ .status-green { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
151
+ .status-orange { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
152
+ .status-red { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
153
  .stats-box {
154
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
155
+ padding: 20px;
156
+ border-radius: 12px;
157
+ border-left: 5px solid #667eea;
158
+ margin: 15px 0;
159
+ }
160
+ .example-box {
161
  background: #f8f9fa;
162
  padding: 15px;
163
  border-radius: 8px;
164
+ border: 1px solid #dee2e6;
 
 
 
 
 
 
 
165
  margin: 10px 0;
166
  }
167
  """
168
+ ) as demo:
169
 
170
+ gr.HTML(f"""
171
  <div class="main-header">
172
+ <h1>🤖➡️👤 AI Text Humanizer Pro</h1>
173
+ <p>Production-Grade AI Text Humanization with Advanced Features</p>
174
+ <p><em>Transform AI-generated text to sound naturally human</em></p>
175
  </div>
176
  """)
177
 
178
+ # Feature status indicator
179
+ if initialization_success:
180
+ status_text, status_color = get_feature_status()
181
+ gr.HTML(f"""
182
+ <div class="feature-status status-{status_color}">
183
+ {status_text}
184
+ </div>
185
+ """)
186
+ else:
187
+ gr.HTML(f"""
188
+ <div class="feature-status status-red">
189
+ Initialization Failed - Please refresh the page
190
+ </div>
191
+ """)
 
 
 
192
 
193
+ with gr.Tab("🚀 Humanize Text"):
194
  with gr.Row():
195
  with gr.Column(scale=1):
196
  gr.HTML("<h3>📝 Input</h3>")
197
 
198
  input_text = gr.Textbox(
199
  label="Text to Humanize",
200
+ placeholder="Paste your AI-generated text here...\n\nExample: Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy.",
201
+ lines=12,
202
  max_lines=20
203
  )
204
 
 
206
  style_dropdown = gr.Dropdown(
207
  choices=["Natural", "Casual", "Conversational"],
208
  value="Natural",
209
+ label="🎨 Humanization Style",
210
+ info="Natural: Professional with human touch | Casual: Relaxed and clear | Conversational: Like talking to a friend"
211
  )
212
 
213
  intensity_slider = gr.Slider(
214
  minimum=0.1,
215
  maximum=1.0,
216
+ value=0.8,
217
  step=0.1,
218
+ label="⚡ Intensity Level",
219
+ info="How much to humanize (0.1 = subtle, 1.0 = maximum)"
220
  )
221
 
222
+ with gr.Row():
223
+ humanize_btn = gr.Button(
224
+ "🚀 Humanize Text",
225
+ variant="primary",
226
+ size="lg"
227
+ )
228
+
229
+ details_checkbox = gr.Checkbox(
230
+ label="📊 Show Advanced Details",
231
+ value=True
232
+ )
233
 
234
  with gr.Column(scale=1):
235
  gr.HTML("<h3>✨ Output</h3>")
236
 
237
  output_text = gr.Textbox(
238
  label="Humanized Text",
239
+ lines=12,
240
  max_lines=20,
241
  show_copy_button=True
242
  )
243
 
244
+ status_output = gr.Textbox(
245
+ label="Status",
246
+ lines=1,
247
+ interactive=False
248
+ )
249
+
250
  stats_output = gr.Markdown(
251
+ label="📊 Detailed Analysis",
252
  value="Results will appear here after processing..."
253
  )
254
 
255
+ with gr.Tab("📊 Examples & Features"):
256
+ gr.HTML("""
257
+ <div class="stats-box">
258
+ <h3>🎯 Advanced Production Features</h3>
259
+ <p>This production-grade humanizer includes:</p>
260
+ </div>
261
+ """)
262
+
263
+ # Show current feature status
264
+ if initialization_success:
265
+ feature_list = f"""
266
+ <div class="example-box">
267
+ <h4>✅ Currently Enabled Features:</h4>
268
+ <ul>
269
+ <li><strong>Advanced Semantic Similarity:</strong> {'✅ ENABLED' if initialization_status.get('advanced_similarity') else '❌ DISABLED'} - Uses sentence transformers for meaning preservation</li>
270
+ <li><strong>AI Paraphrasing:</strong> {'✅ ENABLED' if initialization_status.get('ai_paraphrasing') else '❌ DISABLED'} - Google's FLAN-T5 for intelligent rewrites</li>
271
+ <li><strong>Smart Word Replacement:</strong> ✅ ENABLED - 70+ formal→casual mappings</li>
272
+ <li><strong>Natural Contractions:</strong> ✅ ENABLED - 37+ contraction patterns</li>
273
+ <li><strong>AI Transition Removal:</strong> ✅ ENABLED - Removes robotic phrases</li>
274
+ <li><strong>Quality Control:</strong> ✅ ENABLED - Automatic quality validation</li>
275
+ </ul>
276
+ </div>
277
+ """
278
+ gr.HTML(feature_list)
279
 
280
  # Examples
281
+ gr.HTML("<h3>💡 Try These Examples</h3>")
282
+
283
  examples = gr.Examples(
284
  examples=[
285
  [
286
  "Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities in natural language processing tasks. Subsequently, these systems can analyze and generate text with remarkable accuracy. Nevertheless, it is crucial to understand that human oversight remains essential for optimal performance.",
287
  "Conversational",
288
+ 0.8,
289
+ True
290
  ],
291
  [
292
+ "The implementation of comprehensive methodologies will facilitate the optimization of business processes and operational workflows. Moreover, it will demonstrate substantial improvements in efficiency metrics while maintaining quality standards throughout the organization.",
293
  "Natural",
294
+ 0.7,
295
+ True
296
  ],
297
  [
298
+ "Subsequently, organizations must utilize systematic approaches to evaluate and implement technological solutions. Therefore, it is essential to establish comprehensive frameworks that demonstrate optimal performance and facilitate substantial improvements in operational efficiency.",
299
  "Casual",
300
+ 0.6,
301
+ True
302
  ],
303
  [
304
+ "Moreover, the utilization of advanced algorithms enables organizations to obtain optimal results while maintaining sufficient quality standards. Additionally, these systems demonstrate remarkable capabilities in processing and analyzing substantial amounts of data with exceptional accuracy.",
305
  "Conversational",
306
+ 0.9,
307
+ True
308
  ]
309
  ],
310
+ inputs=[input_text, style_dropdown, intensity_slider, details_checkbox],
311
+ outputs=[output_text, stats_output, status_output],
312
  fn=humanize_text_hf,
313
+ cache_examples=False,
314
+ label="Click any example to try it!"
315
  )
316
 
317
+ # Performance info
318
  gr.HTML("""
319
+ <div class="stats-box">
320
+ <h3> Performance Specifications</h3>
321
+ <ul>
322
+ <li><strong>Processing Speed:</strong> ~500ms average (first run includes model loading)</li>
323
+ <li><strong>Quality Preservation:</strong> 85-95% semantic similarity maintained</li>
324
+ <li><strong>Transformation Accuracy:</strong> Advanced NLP models ensure high-quality output</li>
325
+ <li><strong>Production Ready:</strong> Comprehensive error handling and quality control</li>
326
+ </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  </div>
328
  """)
329
 
330
+ # Usage guide
331
+ gr.HTML("""
332
+ <div class="example-box">
333
+ <h3>📋 Usage Guide</h3>
334
+ <ul>
335
+ <li><strong>Natural (0.5-0.7):</strong> Best for professional content that needs human touch</li>
336
+ <li><strong>Casual (0.6-0.8):</strong> Perfect for blog posts, articles, and informal content</li>
337
+ <li><strong>Conversational (0.7-1.0):</strong> Ideal for social media and very informal text</li>
338
+ </ul>
339
+ <p><em>💡 Tip: Start with Natural style at 0.7 intensity for most use cases</em></p>
340
+ </div>
341
+ """)
 
 
 
 
342
 
343
  # Event handlers
344
  humanize_btn.click(
345
  fn=humanize_text_hf,
346
+ inputs=[input_text, style_dropdown, intensity_slider, details_checkbox],
347
+ outputs=[output_text, stats_output, status_output]
348
  )
349
 
350
+ # Launch the interface
351
  if __name__ == "__main__":
352
+ print("🌐 Launching Production AI Text Humanizer on Hugging Face Spaces...")
353
+ print(f"🎯 Initialization Status: {'✅ SUCCESS' if initialization_success else '❌ FAILED'}")
354
+
355
+ demo.launch(
356
+ share=False,
357
  server_name="0.0.0.0",
358
  server_port=7860,
359
+ show_error=True,
360
+ show_api=False
361
  )
diagnose_and_fix.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Production Diagnostics and Quick Fix for AI Text Humanizer
4
+ This script will identify exactly what's wrong and fix it
5
+ """
6
+
7
+ import sys
8
+ import subprocess
9
+ import importlib
10
+ import os
11
+
12
+ def test_import(module_name, component=None):
13
+ """Test if a module/component can be imported"""
14
+ try:
15
+ if component:
16
+ module = importlib.import_module(module_name)
17
+ getattr(module, component)
18
+ return True, "OK"
19
+ else:
20
+ importlib.import_module(module_name)
21
+ return True, "OK"
22
+ except ImportError as e:
23
+ return False, f"ImportError: {str(e)}"
24
+ except AttributeError as e:
25
+ return False, f"AttributeError: {str(e)}"
26
+ except Exception as e:
27
+ return False, f"Error: {str(e)}"
28
+
29
+ def run_pip_command(cmd):
30
+ """Run pip command safely"""
31
+ try:
32
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
33
+ return True, result.stdout
34
+ except subprocess.CalledProcessError as e:
35
+ return False, e.stderr
36
+
37
+ def main():
38
+ print("🔧 AI TEXT HUMANIZER - PRODUCTION DIAGNOSTICS & FIX")
39
+ print("=" * 60)
40
+ print("This will diagnose and fix your advanced model issues\n")
41
+
42
+ # Test current imports
43
+ print("📋 CURRENT STATUS:")
44
+ print("-" * 20)
45
+
46
+ tests = [
47
+ ("sentence_transformers", "SentenceTransformer"),
48
+ ("transformers", "pipeline"),
49
+ ("torch", None),
50
+ ("sklearn", None),
51
+ ("nltk", None),
52
+ ("gradio", None)
53
+ ]
54
+
55
+ results = {}
56
+ for module, component in tests:
57
+ success, message = test_import(module, component)
58
+ status = "✅ WORKING" if success else "❌ FAILED"
59
+ print(f"{module}: {status}")
60
+ if not success:
61
+ print(f" Error: {message}")
62
+ results[module] = success
63
+
64
+ # Check specific model loading
65
+ print(f"\n🤖 TESTING MODEL LOADING:")
66
+ print("-" * 30)
67
+
68
+ if results.get('sentence_transformers'):
69
+ try:
70
+ print("🔄 Testing sentence transformer model...")
71
+ from sentence_transformers import SentenceTransformer
72
+ model = SentenceTransformer('all-MiniLM-L6-v2')
73
+ test_result = model.encode(["test"])
74
+ print("✅ Sentence transformer: MODEL LOADED")
75
+ results['sentence_model'] = True
76
+ except Exception as e:
77
+ print(f"❌ Sentence transformer: MODEL FAILED - {e}")
78
+ results['sentence_model'] = False
79
+ else:
80
+ results['sentence_model'] = False
81
+
82
+ if results.get('transformers'):
83
+ try:
84
+ print("🔄 Testing paraphrasing model...")
85
+ from transformers import pipeline
86
+ paraphraser = pipeline("text2text-generation", model="google/flan-t5-small")
87
+ test_result = paraphraser("test sentence", max_length=50)
88
+ print("✅ Paraphrasing: MODEL LOADED")
89
+ results['paraphrase_model'] = True
90
+ except Exception as e:
91
+ print(f"❌ Paraphrasing: MODEL FAILED - {e}")
92
+ results['paraphrase_model'] = False
93
+ else:
94
+ results['paraphrase_model'] = False
95
+
96
+ # Analyze issues and provide fixes
97
+ print(f"\n🎯 DIAGNOSIS & SOLUTIONS:")
98
+ print("-" * 30)
99
+
100
+ if not results['sentence_transformers']:
101
+ print("🚨 ISSUE: sentence-transformers not working")
102
+ print("💡 SOLUTION:")
103
+ print(" pip uninstall -y sentence-transformers huggingface_hub")
104
+ print(" pip install huggingface_hub==0.17.3")
105
+ print(" pip install sentence-transformers==2.2.2")
106
+ print()
107
+
108
+ fix = input("🔧 Apply this fix now? (y/n): ").lower().strip()
109
+ if fix == 'y':
110
+ print("🔄 Applying sentence-transformers fix...")
111
+ success1, _ = run_pip_command("pip uninstall -y sentence-transformers huggingface_hub")
112
+ success2, _ = run_pip_command("pip install huggingface_hub==0.17.3")
113
+ success3, _ = run_pip_command("pip install sentence-transformers==2.2.2")
114
+
115
+ if success1 and success2 and success3:
116
+ print("✅ Fix applied successfully!")
117
+ # Test again
118
+ success, message = test_import('sentence_transformers', 'SentenceTransformer')
119
+ if success:
120
+ print("✅ sentence-transformers now working!")
121
+ results['sentence_transformers'] = True
122
+ else:
123
+ print(f"❌ Still not working: {message}")
124
+ else:
125
+ print("❌ Fix failed")
126
+
127
+ if not results['transformers']:
128
+ print("🚨 ISSUE: transformers not working")
129
+ print("💡 SOLUTION:")
130
+ print(" pip install transformers==4.35.0 torch")
131
+ print()
132
+
133
+ fix = input("🔧 Apply this fix now? (y/n): ").lower().strip()
134
+ if fix == 'y':
135
+ print("🔄 Applying transformers fix...")
136
+ success1, _ = run_pip_command("pip install transformers==4.35.0")
137
+ success2, _ = run_pip_command("pip install torch")
138
+
139
+ if success1 and success2:
140
+ print("✅ Fix applied successfully!")
141
+ success, message = test_import('transformers', 'pipeline')
142
+ if success:
143
+ print("✅ transformers now working!")
144
+ results['transformers'] = True
145
+ else:
146
+ print(f"❌ Still not working: {message}")
147
+ else:
148
+ print("❌ Fix failed")
149
+
150
+ # Final test with our humanizer
151
+ print(f"\n🧪 FINAL TEST:")
152
+ print("-" * 15)
153
+
154
+ try:
155
+ # Try importing our production version
156
+ if os.path.exists("text_humanizer_production.py"):
157
+ sys.path.insert(0, ".")
158
+ from text_humanizer_production import ProductionAITextHumanizer
159
+
160
+ print("🔄 Creating production humanizer...")
161
+ humanizer = ProductionAITextHumanizer()
162
+
163
+ print("🔄 Testing humanization...")
164
+ result = humanizer.humanize_text_production(
165
+ "Furthermore, it is important to note that these systems demonstrate significant capabilities.",
166
+ style="conversational",
167
+ intensity=0.8
168
+ )
169
+
170
+ print("✅ PRODUCTION TEST SUCCESSFUL!")
171
+ print(f"Original: Furthermore, it is important to note that...")
172
+ print(f"Humanized: {result['humanized_text']}")
173
+ print(f"Quality Score: {result['quality_score']:.3f}")
174
+
175
+ # Check what features are working
176
+ working_features = sum([
177
+ results.get('sentence_model', False),
178
+ results.get('paraphrase_model', False),
179
+ True, # Basic features always work
180
+ ])
181
+
182
+ if working_features >= 2:
183
+ print("🎉 PRODUCTION READY!")
184
+ else:
185
+ print("⚠️ Limited features - but still functional")
186
+
187
+ else:
188
+ print("❌ text_humanizer_production.py not found")
189
+
190
+ except Exception as e:
191
+ print(f"❌ Final test failed: {e}")
192
+
193
+ # Summary and next steps
194
+ print(f"\n📊 SUMMARY:")
195
+ print("-" * 12)
196
+
197
+ working_count = sum([
198
+ results.get('sentence_transformers', False),
199
+ results.get('transformers', False),
200
+ results.get('sentence_model', False),
201
+ results.get('paraphrase_model', False)
202
+ ])
203
+
204
+ if working_count >= 3:
205
+ print("🎉 ALL ADVANCED FEATURES WORKING!")
206
+ print("✅ Your AI Text Humanizer is production-ready")
207
+ print("\n🚀 Next steps:")
208
+ print(" python text_humanizer_production.py # Test it")
209
+ print(" python fastapi_server.py # Run API")
210
+ print(" python gradio_app.py # Run web UI")
211
+
212
+ elif working_count >= 1:
213
+ print("⚠️ SOME FEATURES WORKING")
214
+ print("✅ Your humanizer will work with reduced functionality")
215
+ print("\n🚀 To enable all features, run the fixes above")
216
+
217
+ else:
218
+ print("❌ CRITICAL ISSUES DETECTED")
219
+ print("💡 Run this command for a fresh start:")
220
+ print(" python install_production.py")
221
+
222
+ print(f"\n📞 Need help? Check:")
223
+ print(" - README.md for detailed setup")
224
+ print(" - DEPENDENCY_FIX.md for troubleshooting")
225
+ print(" - Run: python install_production.py")
226
+
227
+ if __name__ == "__main__":
228
+ main()
install_production.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Production Installation Script for AI Text Humanizer
4
+ Ensures all advanced features are properly installed and working
5
+ """
6
+
7
+ import subprocess
8
+ import sys
9
+ import os
10
+ import time
11
+
12
+ def run_command(cmd, description, critical=True):
13
+ """Run a command and handle errors"""
14
+ print(f"🔄 {description}...")
15
+ try:
16
+ result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
17
+ print(f"✅ {description} - SUCCESS")
18
+ if result.stdout.strip():
19
+ print(f" Output: {result.stdout.strip()}")
20
+ return True
21
+ except subprocess.CalledProcessError as e:
22
+ print(f"❌ {description} - FAILED")
23
+ print(f" Error: {e.stderr.strip()}")
24
+ if critical:
25
+ return False
26
+ return True
27
+
28
+ def check_gpu_availability():
29
+ """Check if CUDA/GPU is available for better performance"""
30
+ try:
31
+ result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
32
+ if result.returncode == 0:
33
+ print("🚀 NVIDIA GPU detected - will install CUDA support")
34
+ return True
35
+ except FileNotFoundError:
36
+ pass
37
+
38
+ print("💻 No NVIDIA GPU detected - using CPU versions")
39
+ return False
40
+
41
+ def production_install():
42
+ """Install production-grade AI Text Humanizer with all features"""
43
+ print("🏭 AI TEXT HUMANIZER - PRODUCTION INSTALLATION")
44
+ print("=" * 55)
45
+ print("📋 This will install ALL advanced features:")
46
+ print(" ✨ Advanced semantic similarity (sentence-transformers)")
47
+ print(" 🧠 AI paraphrasing capabilities (transformers)")
48
+ print(" 🚀 GPU acceleration (if available)")
49
+ print(" 📊 Full API and web interfaces")
50
+ print("")
51
+
52
+ # Check system
53
+ has_gpu = check_gpu_availability()
54
+
55
+ print("🔧 Starting production installation...")
56
+ print("-" * 40)
57
+
58
+ # Step 1: Clean existing installation
59
+ print("\n📦 STEP 1: Cleaning existing installation")
60
+ cleanup_commands = [
61
+ "pip uninstall -y sentence-transformers transformers huggingface_hub torch torchvision torchaudio",
62
+ "pip cache purge"
63
+ ]
64
+
65
+ for cmd in cleanup_commands:
66
+ run_command(cmd, "Cleaning previous installation", critical=False)
67
+
68
+ # Step 2: Upgrade pip and install build tools
69
+ print("\n🔨 STEP 2: Installing build tools")
70
+ build_commands = [
71
+ "pip install --upgrade pip setuptools wheel",
72
+ "pip install --upgrade packaging"
73
+ ]
74
+
75
+ for cmd in build_commands:
76
+ if not run_command(cmd, "Installing build tools"):
77
+ return False
78
+
79
+ # Step 3: Install PyTorch (choose CPU or GPU version)
80
+ print("\n🧠 STEP 3: Installing PyTorch")
81
+ if has_gpu:
82
+ torch_cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121"
83
+ else:
84
+ torch_cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu"
85
+
86
+ if not run_command(torch_cmd, "Installing PyTorch with proper backend"):
87
+ print("⚠️ PyTorch installation failed, trying alternative...")
88
+ if not run_command("pip install torch==2.1.0", "Installing PyTorch (fallback)"):
89
+ return False
90
+
91
+ # Step 4: Install HuggingFace ecosystem with compatible versions
92
+ print("\n🤗 STEP 4: Installing HuggingFace ecosystem")
93
+ hf_commands = [
94
+ "pip install huggingface_hub==0.17.3",
95
+ "pip install tokenizers==0.14.1",
96
+ "pip install transformers==4.35.0",
97
+ "pip install accelerate==0.24.1"
98
+ ]
99
+
100
+ for cmd in hf_commands:
101
+ if not run_command(cmd, f"Installing {cmd.split()[1]}"):
102
+ return False
103
+
104
+ # Step 5: Install sentence transformers
105
+ print("\n🔤 STEP 5: Installing Sentence Transformers")
106
+ if not run_command("pip install sentence-transformers==2.2.2", "Installing Sentence Transformers"):
107
+ print("⚠️ Trying alternative installation...")
108
+ if not run_command("pip install sentence-transformers==2.1.0", "Installing Sentence Transformers (fallback)"):
109
+ return False
110
+
111
+ # Step 6: Install additional ML libraries
112
+ print("\n📊 STEP 6: Installing ML libraries")
113
+ ml_commands = [
114
+ "pip install scikit-learn==1.3.2",
115
+ "pip install numpy==1.25.2",
116
+ "pip install pandas==2.1.3",
117
+ "pip install nltk==3.8.1"
118
+ ]
119
+
120
+ for cmd in ml_commands:
121
+ if not run_command(cmd, f"Installing {cmd.split()[1]}"):
122
+ return False
123
+
124
+ # Step 7: Install web frameworks
125
+ print("\n🌐 STEP 7: Installing web frameworks")
126
+ web_commands = [
127
+ "pip install fastapi==0.104.1",
128
+ "pip install uvicorn[standard]==0.24.0",
129
+ "pip install gradio==4.7.1",
130
+ "pip install python-multipart==0.0.6",
131
+ "pip install aiofiles==23.2.1",
132
+ "pip install requests==2.31.0"
133
+ ]
134
+
135
+ for cmd in web_commands:
136
+ if not run_command(cmd, f"Installing {cmd.split()[1]}"):
137
+ return False
138
+
139
+ # Step 8: Install optional production libraries
140
+ print("\n⚡ STEP 8: Installing production libraries")
141
+ prod_commands = [
142
+ "pip install redis==5.0.1",
143
+ "pip install psutil",
144
+ "pip install python-dotenv"
145
+ ]
146
+
147
+ for cmd in prod_commands:
148
+ run_command(cmd, f"Installing {cmd.split()[1]}", critical=False)
149
+
150
+ # Step 9: Download NLTK data
151
+ print("\n📚 STEP 9: Downloading NLTK data")
152
+ nltk_downloads = [
153
+ "python -c \"import nltk; nltk.download('punkt', quiet=True)\"",
154
+ "python -c \"import nltk; nltk.download('wordnet', quiet=True)\"",
155
+ "python -c \"import nltk; nltk.download('omw-1.4', quiet=True)\"",
156
+ "python -c \"import nltk; nltk.download('stopwords', quiet=True)\""
157
+ ]
158
+
159
+ for cmd in nltk_downloads:
160
+ run_command(cmd, "Downloading NLTK data", critical=False)
161
+
162
+ # Step 10: Pre-download models
163
+ print("\n🤖 STEP 10: Pre-downloading models")
164
+ model_downloads = [
165
+ "python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')\"",
166
+ "python -c \"from transformers import pipeline; pipeline('text2text-generation', model='google/flan-t5-small')\""
167
+ ]
168
+
169
+ for cmd in model_downloads:
170
+ run_command(cmd, "Pre-downloading models", critical=False)
171
+
172
+ print(f"\n🎉 INSTALLATION COMPLETED!")
173
+ return True
174
+
175
+ def test_installation():
176
+ """Test if all components are working"""
177
+ print(f"\n🧪 TESTING INSTALLATION")
178
+ print("=" * 30)
179
+
180
+ test_results = {}
181
+
182
+ # Test imports
183
+ imports_to_test = [
184
+ ("sentence_transformers", "SentenceTransformer"),
185
+ ("transformers", "pipeline"),
186
+ ("torch", None),
187
+ ("sklearn", None),
188
+ ("nltk", None),
189
+ ("gradio", None),
190
+ ("fastapi", None)
191
+ ]
192
+
193
+ for module, component in imports_to_test:
194
+ try:
195
+ if component:
196
+ exec(f"from {module} import {component}")
197
+ else:
198
+ exec(f"import {module}")
199
+ print(f"✅ {module}: Import successful")
200
+ test_results[module] = True
201
+ except Exception as e:
202
+ print(f"❌ {module}: Import failed - {e}")
203
+ test_results[module] = False
204
+
205
+ # Test model loading
206
+ print(f"\n🤖 Testing model loading...")
207
+
208
+ try:
209
+ from sentence_transformers import SentenceTransformer
210
+ model = SentenceTransformer('all-MiniLM-L6-v2')
211
+ print("✅ Sentence transformer: Model loaded successfully")
212
+ test_results['sentence_model'] = True
213
+ except Exception as e:
214
+ print(f"❌ Sentence transformer: Model loading failed - {e}")
215
+ test_results['sentence_model'] = False
216
+
217
+ try:
218
+ from transformers import pipeline
219
+ paraphraser = pipeline("text2text-generation", model="google/flan-t5-small")
220
+ print("✅ Paraphrasing model: Model loaded successfully")
221
+ test_results['paraphrase_model'] = True
222
+ except Exception as e:
223
+ print(f"❌ Paraphrasing model: Model loading failed - {e}")
224
+ test_results['paraphrase_model'] = False
225
+
226
+ # Test GPU availability
227
+ try:
228
+ import torch
229
+ if torch.cuda.is_available():
230
+ print(f"✅ CUDA: {torch.cuda.device_count()} GPU(s) available")
231
+ test_results['gpu'] = True
232
+ else:
233
+ print("💻 CUDA: Not available (using CPU)")
234
+ test_results['gpu'] = False
235
+ except:
236
+ test_results['gpu'] = False
237
+
238
+ return test_results
239
+
240
+ def create_production_requirements():
241
+ """Create production requirements file"""
242
+ requirements = """# AI Text Humanizer - Production Requirements
243
+ # All features enabled with compatible versions
244
+
245
+ # Core ML frameworks
246
+ torch>=2.1.0
247
+ transformers==4.35.0
248
+ sentence-transformers==2.2.2
249
+ huggingface_hub==0.17.3
250
+ accelerate==0.24.1
251
+
252
+ # NLP libraries
253
+ nltk==3.8.1
254
+ scikit-learn==1.3.2
255
+ numpy==1.25.2
256
+ pandas==2.1.3
257
+
258
+ # Web frameworks
259
+ fastapi==0.104.1
260
+ uvicorn[standard]==0.24.0
261
+ gradio==4.7.1
262
+ python-multipart==0.0.6
263
+ aiofiles==23.2.1
264
+ requests==2.31.0
265
+
266
+ # Production libraries
267
+ redis==5.0.1
268
+ psutil
269
+ python-dotenv
270
+
271
+ # Build tools
272
+ setuptools
273
+ wheel
274
+ packaging
275
+ """
276
+
277
+ with open("requirements-production.txt", "w") as f:
278
+ f.write(requirements)
279
+
280
+ print("✅ Created requirements-production.txt")
281
+
282
+ def main():
283
+ """Main installation process"""
284
+ print("🚀 AI TEXT HUMANIZER - PRODUCTION SETUP")
285
+ print("======================================")
286
+
287
+ # Check Python version
288
+ if sys.version_info < (3, 7):
289
+ print("❌ Python 3.7+ required")
290
+ return False
291
+
292
+ print(f"🐍 Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} detected")
293
+
294
+ # Check virtual environment
295
+ in_venv = hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)
296
+ if not in_venv:
297
+ print("⚠️ Warning: Not in virtual environment")
298
+ response = input("Continue? (y/n): ").lower().strip()
299
+ if response != 'y':
300
+ print("👋 Please create a virtual environment first")
301
+ return False
302
+ else:
303
+ print("✅ Virtual environment detected")
304
+
305
+ # Start installation
306
+ if not production_install():
307
+ print("\n❌ Installation failed!")
308
+ return False
309
+
310
+ # Test installation
311
+ test_results = test_installation()
312
+
313
+ # Create requirements file
314
+ create_production_requirements()
315
+
316
+ # Summary
317
+ print(f"\n📊 INSTALLATION SUMMARY")
318
+ print("=" * 30)
319
+
320
+ success_count = sum(1 for v in test_results.values() if v)
321
+ total_count = len(test_results)
322
+
323
+ print(f"✅ {success_count}/{total_count} components working")
324
+
325
+ if test_results.get('sentence_model') and test_results.get('paraphrase_model'):
326
+ print("🎉 ALL ADVANCED FEATURES ENABLED!")
327
+ print(" • Advanced semantic similarity ✅")
328
+ print(" • AI paraphrasing capabilities ✅")
329
+ print(" • Production-ready performance ✅")
330
+ elif test_results.get('sentence_model'):
331
+ print("⚠️ Advanced similarity enabled, paraphrasing needs attention")
332
+ elif test_results.get('paraphrase_model'):
333
+ print("⚠️ Paraphrasing enabled, similarity needs attention")
334
+ else:
335
+ print("❌ Advanced features need troubleshooting")
336
+
337
+ print(f"\n🎯 NEXT STEPS:")
338
+ print("1. Test: python text_humanizer_robust.py")
339
+ print("2. Run API: python fastapi_server.py")
340
+ print("3. Run web UI: python gradio_app.py")
341
+
342
+ return success_count >= total_count - 2 # Allow 2 optional failures
343
+
344
+ if __name__ == "__main__":
345
+ try:
346
+ success = main()
347
+ if success:
348
+ print(f"\n🎉 Production installation successful!")
349
+ else:
350
+ print(f"\n❌ Production installation needs attention")
351
+ except KeyboardInterrupt:
352
+ print(f"\n👋 Installation cancelled")
353
+ except Exception as e:
354
+ print(f"\n❌ Unexpected error: {e}")
requirements.txt CHANGED
@@ -1,10 +1,20 @@
1
- # Minimal requirements for Hugging Face Spaces to avoid dependency conflicts
2
- gradio==4.7.1
 
 
 
 
 
 
3
  nltk==3.8.1
4
- numpy==1.25.2
5
  scikit-learn==1.3.2
 
 
 
 
 
6
 
7
- # Optional dependencies (will be installed if available)
8
- # sentence-transformers==2.2.2
9
- # transformers==4.35.0
10
- # torch==2.1.0
 
1
+ # Core ML frameworks
2
+ torch>=2.1.0
3
+ transformers==4.35.0
4
+ sentence-transformers==2.2.2
5
+ huggingface_hub==0.17.3
6
+ accelerate==0.24.1
7
+
8
+ # NLP and processing libraries
9
  nltk==3.8.1
 
10
  scikit-learn==1.3.2
11
+ numpy==1.25.2
12
+ pandas==2.1.3
13
+
14
+ # Web interface
15
+ gradio==4.7.1
16
 
17
+ # Build and utility libraries
18
+ setuptools
19
+ wheel
20
+ packaging
text_humanizer_production.py ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import random
3
+ import nltk
4
+ from typing import List, Dict, Optional
5
+ import numpy as np
6
+
7
+ # Download required NLTK data
8
+ try:
9
+ nltk.data.find('tokenizers/punkt')
10
+ except LookupError:
11
+ nltk.download('punkt')
12
+
13
+ try:
14
+ nltk.data.find('corpora/wordnet')
15
+ except LookupError:
16
+ nltk.download('wordnet')
17
+
18
+ try:
19
+ nltk.data.find('corpora/omw-1.4')
20
+ except LookupError:
21
+ nltk.download('omw-1.4')
22
+
23
+ from nltk.tokenize import sent_tokenize, word_tokenize
24
+
25
+ # Production-grade imports with proper error handling and retries
26
+ def safe_import_with_retry(module_name, component=None, max_retries=3):
27
+ """Import with retries and detailed error reporting"""
28
+ for attempt in range(max_retries):
29
+ try:
30
+ if component:
31
+ module = __import__(module_name, fromlist=[component])
32
+ return getattr(module, component), True
33
+ else:
34
+ return __import__(module_name), True
35
+ except ImportError as e:
36
+ if attempt < max_retries - 1:
37
+ print(f"⚠️ Import attempt {attempt + 1} failed for {module_name}: {e}")
38
+ print(f"🔄 Retrying in 2 seconds...")
39
+ import time
40
+ time.sleep(2)
41
+ continue
42
+ else:
43
+ print(f"❌ Final import failed for {module_name}: {e}")
44
+ return None, False
45
+ except Exception as e:
46
+ print(f"❌ Unexpected error importing {module_name}: {e}")
47
+ return None, False
48
+ return None, False
49
+
50
+ # Advanced model imports with retries
51
+ print("🚀 Loading AI Text Humanizer - Production Version...")
52
+ print("=" * 50)
53
+
54
+ print("📥 Loading sentence transformers...")
55
+ SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_retry('sentence_transformers', 'SentenceTransformer')
56
+
57
+ print("📥 Loading transformers pipeline...")
58
+ pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_retry('transformers', 'pipeline')
59
+
60
+ print("📥 Loading scikit-learn...")
61
+ try:
62
+ from sklearn.feature_extraction.text import TfidfVectorizer
63
+ from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity
64
+ SKLEARN_AVAILABLE = True
65
+ print("✅ Scikit-learn loaded successfully")
66
+ except ImportError as e:
67
+ print(f"⚠️ Scikit-learn not available: {e}")
68
+ SKLEARN_AVAILABLE = False
69
+
70
+ # Additional production imports
71
+ try:
72
+ import torch
73
+ TORCH_AVAILABLE = True
74
+ print(f"✅ PyTorch loaded - CUDA available: {torch.cuda.is_available()}")
75
+ except ImportError:
76
+ TORCH_AVAILABLE = False
77
+ print("⚠️ PyTorch not available")
78
+
79
+ class ProductionAITextHumanizer:
80
+ def __init__(self, enable_gpu=True, model_cache_dir=None):
81
+ """Initialize production-grade text humanizer with all advanced features"""
82
+ print("🏭 Initializing Production AI Text Humanizer...")
83
+
84
+ self.enable_gpu = enable_gpu and TORCH_AVAILABLE
85
+ self.model_cache_dir = model_cache_dir
86
+
87
+ # Initialize advanced models with detailed error handling
88
+ self._load_similarity_model()
89
+ self._load_paraphrasing_model()
90
+ self._initialize_fallback_methods()
91
+ self._setup_word_mappings()
92
+
93
+ print("✅ Production AI Text Humanizer initialized!")
94
+ self._print_feature_status()
95
+
96
+ def _load_similarity_model(self):
97
+ """Load sentence transformer with production settings"""
98
+ self.similarity_model = None
99
+
100
+ if SENTENCE_TRANSFORMERS_AVAILABLE and SentenceTransformer:
101
+ try:
102
+ print("🔄 Loading sentence transformer model...")
103
+
104
+ # Production settings
105
+ model_kwargs = {
106
+ 'device': 'cuda' if self.enable_gpu and torch.cuda.is_available() else 'cpu'
107
+ } if TORCH_AVAILABLE else {}
108
+
109
+ if self.model_cache_dir:
110
+ model_kwargs['cache_folder'] = self.model_cache_dir
111
+
112
+ self.similarity_model = SentenceTransformer(
113
+ 'all-MiniLM-L6-v2',
114
+ **model_kwargs
115
+ )
116
+
117
+ # Test the model
118
+ test_embedding = self.similarity_model.encode(["test sentence"])
119
+ print("✅ Sentence transformer model loaded and tested successfully!")
120
+
121
+ except Exception as e:
122
+ print(f"❌ Failed to load sentence transformer: {e}")
123
+ print("💡 Troubleshooting tips:")
124
+ print(" - Check internet connection for model download")
125
+ print(" - Verify sentence-transformers version: pip install sentence-transformers==2.2.2")
126
+ print(" - Check CUDA compatibility if using GPU")
127
+ self.similarity_model = None
128
+ else:
129
+ print("❌ Sentence transformers not available")
130
+
131
+ def _load_paraphrasing_model(self):
132
+ """Load paraphrasing model with production settings"""
133
+ self.paraphraser = None
134
+
135
+ if TRANSFORMERS_AVAILABLE and pipeline:
136
+ try:
137
+ print("🔄 Loading paraphrasing model...")
138
+
139
+ # Production settings
140
+ device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1
141
+
142
+ self.paraphraser = pipeline(
143
+ "text2text-generation",
144
+ model="google/flan-t5-small",
145
+ device=device,
146
+ max_length=512,
147
+ model_kwargs={"cache_dir": self.model_cache_dir} if self.model_cache_dir else {}
148
+ )
149
+
150
+ # Test the model
151
+ test_result = self.paraphraser("Test sentence for paraphrasing.", max_length=50)
152
+ print("✅ Paraphrasing model loaded and tested successfully!")
153
+
154
+ except Exception as e:
155
+ print(f"❌ Failed to load paraphrasing model: {e}")
156
+ print("💡 Troubleshooting tips:")
157
+ print(" - Check internet connection for model download")
158
+ print(" - Verify transformers version: pip install transformers==4.35.0")
159
+ print(" - Check available memory (models need ~2GB RAM)")
160
+ self.paraphraser = None
161
+ else:
162
+ print("❌ Transformers not available")
163
+
164
+ def _initialize_fallback_methods(self):
165
+ """Initialize fallback similarity methods"""
166
+ self.tfidf_vectorizer = None
167
+ if SKLEARN_AVAILABLE:
168
+ try:
169
+ self.tfidf_vectorizer = TfidfVectorizer(
170
+ stop_words='english',
171
+ ngram_range=(1, 2),
172
+ max_features=5000
173
+ )
174
+ print("✅ TF-IDF fallback similarity initialized")
175
+ except Exception as e:
176
+ print(f"⚠️ TF-IDF initialization failed: {e}")
177
+
178
+ def _setup_word_mappings(self):
179
+ """Setup comprehensive word mappings for production"""
180
+ # Extended formal to casual mappings for production
181
+ self.formal_to_casual = {
182
+ # Basic formal words
183
+ "utilize": "use", "demonstrate": "show", "facilitate": "help",
184
+ "implement": "do", "consequently": "so", "therefore": "so",
185
+ "nevertheless": "but", "furthermore": "also", "moreover": "also",
186
+ "subsequently": "then", "accordingly": "so", "regarding": "about",
187
+ "concerning": "about", "pertaining": "about", "approximately": "about",
188
+ "endeavor": "try", "commence": "start", "terminate": "end",
189
+ "obtain": "get", "purchase": "buy", "examine": "look at",
190
+ "analyze": "study", "construct": "build", "establish": "set up",
191
+
192
+ # Advanced formal words
193
+ "magnitude": "size", "comprehensive": "complete", "significant": "big",
194
+ "substantial": "large", "optimal": "best", "sufficient": "enough",
195
+ "adequate": "good enough", "exceptional": "amazing", "remarkable": "great",
196
+ "outstanding": "excellent", "predominant": "main", "fundamental": "basic",
197
+ "essential": "needed", "crucial": "important", "vital": "key",
198
+ "paramount": "most important", "imperative": "must", "mandatory": "required",
199
+
200
+ # Formal phrases
201
+ "prior to": "before", "in order to": "to", "due to the fact that": "because",
202
+ "at this point in time": "now", "in the event that": "if",
203
+ "it is important to note": "note that", "it should be emphasized": "remember",
204
+ "it is worth mentioning": "by the way", "it is crucial to understand": "importantly",
205
+ "for the purpose of": "to", "with regard to": "about",
206
+ "in accordance with": "following", "as a result of": "because of",
207
+ "in spite of the fact that": "although", "on the other hand": "however",
208
+
209
+ # Academic/business terms
210
+ "methodology": "method", "systematically": "step by step",
211
+ "optimization": "improvement", "enhancement": "upgrade",
212
+ "implementation": "setup", "utilization": "use", "evaluation": "review",
213
+ "assessment": "check", "validation": "proof", "verification": "confirmation",
214
+ "consolidation": "combining", "integration": "bringing together",
215
+ "transformation": "change", "modification": "change", "alteration": "change"
216
+ }
217
+
218
+ # Extended contractions
219
+ self.contractions = {
220
+ "do not": "don't", "does not": "doesn't", "did not": "didn't",
221
+ "will not": "won't", "would not": "wouldn't", "should not": "shouldn't",
222
+ "could not": "couldn't", "cannot": "can't", "is not": "isn't",
223
+ "are not": "aren't", "was not": "wasn't", "were not": "weren't",
224
+ "have not": "haven't", "has not": "hasn't", "had not": "hadn't",
225
+ "I am": "I'm", "you are": "you're", "he is": "he's", "she is": "she's",
226
+ "it is": "it's", "we are": "we're", "they are": "they're",
227
+ "I have": "I've", "you have": "you've", "we have": "we've",
228
+ "they have": "they've", "I will": "I'll", "you will": "you'll",
229
+ "he will": "he'll", "she will": "she'll", "it will": "it'll",
230
+ "we will": "we'll", "they will": "they'll", "would have": "would've",
231
+ "should have": "should've", "could have": "could've", "might have": "might've"
232
+ }
233
+
234
+ # AI-like transitions (expanded)
235
+ self.ai_transition_words = [
236
+ "Furthermore,", "Moreover,", "Additionally,", "Subsequently,",
237
+ "Consequently,", "Therefore,", "Nevertheless,", "However,",
238
+ "In conclusion,", "To summarize,", "In summary,", "Overall,",
239
+ "It is important to note that", "It should be emphasized that",
240
+ "It is worth mentioning that", "It is crucial to understand that",
241
+ "It is essential to recognize that", "It must be acknowledged that",
242
+ "It should be noted that", "It is imperative to understand",
243
+ "From a practical standpoint,", "From an analytical perspective,",
244
+ "In terms of implementation,", "With respect to the aforementioned,",
245
+ "As previously mentioned,", "As stated earlier,", "In light of this,"
246
+ ]
247
+
248
+ # Natural alternatives (expanded)
249
+ self.natural_transitions = [
250
+ "Also,", "Plus,", "And,", "Then,", "So,", "But,", "Still,",
251
+ "Anyway,", "By the way,", "Actually,", "Basically,", "Look,",
252
+ "Listen,", "Here's the thing:", "The point is,", "What's more,",
253
+ "On top of that,", "Another thing,", "Now,", "Well,", "You know,",
254
+ "I mean,", "Honestly,", "Frankly,", "Simply put,", "In other words,",
255
+ "To put it differently,", "Let me explain,", "Here's what I mean:",
256
+ "Think about it,", "Consider this,", "Get this,", "Check this out,"
257
+ ]
258
+
259
+ def _print_feature_status(self):
260
+ """Print detailed feature status for production monitoring"""
261
+ print("\n📊 PRODUCTION FEATURE STATUS:")
262
+ print("-" * 40)
263
+ print(f"🔤 Advanced Similarity: {'✅ ENABLED' if self.similarity_model else '❌ DISABLED'}")
264
+ print(f"🧠 AI Paraphrasing: {'✅ ENABLED' if self.paraphraser else '❌ DISABLED'}")
265
+ print(f"📊 TF-IDF Fallback: {'✅ ENABLED' if self.tfidf_vectorizer else '❌ DISABLED'}")
266
+ print(f"🚀 GPU Acceleration: {'✅ ENABLED' if self.enable_gpu and TORCH_AVAILABLE else '❌ DISABLED'}")
267
+ print(f"⚡ Word Mappings: ✅ ENABLED ({len(self.formal_to_casual)} mappings)")
268
+ print(f"📝 Contractions: ✅ ENABLED ({len(self.contractions)} contractions)")
269
+
270
+ if TORCH_AVAILABLE:
271
+ import torch
272
+ print(f"🖥️ Device: {'CUDA' if torch.cuda.is_available() and self.enable_gpu else 'CPU'}")
273
+
274
+ # Calculate feature completeness
275
+ total_features = 6
276
+ enabled_features = sum([
277
+ bool(self.similarity_model),
278
+ bool(self.paraphraser),
279
+ bool(self.tfidf_vectorizer),
280
+ True, # Word mappings always available
281
+ True, # Contractions always available
282
+ TORCH_AVAILABLE
283
+ ])
284
+
285
+ completeness = (enabled_features / total_features) * 100
286
+ print(f"🎯 Feature Completeness: {completeness:.1f}%")
287
+
288
+ if completeness < 70:
289
+ print("⚠️ WARNING: Less than 70% features enabled - not production ready")
290
+ elif completeness < 90:
291
+ print("⚠️ CAUTION: Some advanced features missing")
292
+ else:
293
+ print("🎉 PRODUCTION READY: All critical features enabled!")
294
+
295
+ def add_contractions(self, text: str) -> str:
296
+ """Add contractions with improved pattern matching"""
297
+ # Sort by length (longest first) to avoid partial replacements
298
+ sorted_contractions = sorted(self.contractions.items(), key=lambda x: len(x[0]), reverse=True)
299
+
300
+ for formal, casual in sorted_contractions:
301
+ # Use word boundaries to avoid partial matches
302
+ pattern = r'\b' + re.escape(formal) + r'\b'
303
+ text = re.sub(pattern, casual, text, flags=re.IGNORECASE)
304
+
305
+ return text
306
+
307
+ def replace_formal_words(self, text: str, replacement_rate: float = 0.8) -> str:
308
+ """Enhanced formal word replacement with context awareness"""
309
+ # Handle phrases first (longer matches)
310
+ phrase_replacements = {k: v for k, v in self.formal_to_casual.items() if len(k.split()) > 1}
311
+ word_replacements = {k: v for k, v in self.formal_to_casual.items() if len(k.split()) == 1}
312
+
313
+ # Replace phrases first
314
+ for formal_phrase, casual_phrase in phrase_replacements.items():
315
+ if random.random() < replacement_rate:
316
+ pattern = r'\b' + re.escape(formal_phrase) + r'\b'
317
+ text = re.sub(pattern, casual_phrase, text, flags=re.IGNORECASE)
318
+
319
+ # Then replace individual words
320
+ words = word_tokenize(text)
321
+ for i, word in enumerate(words):
322
+ word_clean = word.lower().strip('.,!?;:"')
323
+ if word_clean in word_replacements and random.random() < replacement_rate:
324
+ replacement = word_replacements[word_clean]
325
+ # Preserve case
326
+ if word.isupper():
327
+ words[i] = word.replace(word_clean, replacement.upper())
328
+ elif word.istitle():
329
+ words[i] = word.replace(word_clean, replacement.title())
330
+ else:
331
+ words[i] = word.replace(word_clean, replacement)
332
+
333
+ # Reconstruct with proper spacing
334
+ result = ""
335
+ for i, word in enumerate(words):
336
+ if i > 0 and word not in ".,!?;:\"')":
337
+ result += " "
338
+ result += word
339
+
340
+ return result
341
+
342
+ def replace_ai_transitions(self, text: str) -> str:
343
+ """Enhanced AI transition replacement with context awareness"""
344
+ # Sort by length to handle longer phrases first
345
+ sorted_transitions = sorted(self.ai_transition_words, key=len, reverse=True)
346
+
347
+ for ai_transition in sorted_transitions:
348
+ if ai_transition in text:
349
+ # Choose appropriate natural replacement based on context
350
+ natural_replacement = random.choice(self.natural_transitions)
351
+
352
+ # Adjust replacement based on sentence position
353
+ if text.startswith(ai_transition):
354
+ # Beginning of text
355
+ text = text.replace(ai_transition, natural_replacement, 1)
356
+ else:
357
+ # Middle of text - be more selective
358
+ if random.random() < 0.7: # 70% chance to replace
359
+ text = text.replace(ai_transition, natural_replacement, 1)
360
+
361
+ return text
362
+
363
+ def advanced_paraphrasing(self, text: str, paraphrase_rate: float = 0.4) -> str:
364
+ """Production-grade paraphrasing with quality control"""
365
+ if not self.paraphraser:
366
+ return text
367
+
368
+ sentences = sent_tokenize(text)
369
+ paraphrased_sentences = []
370
+
371
+ for sentence in sentences:
372
+ # Only paraphrase longer, more complex sentences
373
+ if len(sentence.split()) > 10 and random.random() < paraphrase_rate:
374
+ try:
375
+ # Multiple paraphrasing strategies
376
+ prompts = [
377
+ f"Rewrite this more naturally: {sentence}",
378
+ f"Make this sound more conversational: {sentence}",
379
+ f"Rephrase this in simpler terms: {sentence}",
380
+ f"Say this in a more casual way: {sentence}"
381
+ ]
382
+
383
+ prompt = random.choice(prompts)
384
+
385
+ result = self.paraphraser(
386
+ prompt,
387
+ max_length=len(sentence) + 50,
388
+ min_length=max(10, len(sentence) // 2),
389
+ num_return_sequences=1,
390
+ temperature=0.7,
391
+ do_sample=True
392
+ )
393
+
394
+ paraphrased = result[0]['generated_text']
395
+ paraphrased = paraphrased.replace(prompt, '').strip().strip('"\'')
396
+
397
+ # Quality checks
398
+ if (paraphrased and
399
+ len(paraphrased) > 5 and
400
+ len(paraphrased) < len(sentence) * 2 and
401
+ not paraphrased.lower().startswith(('i cannot', 'i can\'t', 'sorry'))):
402
+ paraphrased_sentences.append(paraphrased)
403
+ else:
404
+ paraphrased_sentences.append(sentence)
405
+
406
+ except Exception as e:
407
+ print(f"⚠️ Paraphrasing failed: {e}")
408
+ paraphrased_sentences.append(sentence)
409
+ else:
410
+ paraphrased_sentences.append(sentence)
411
+
412
+ return ' '.join(paraphrased_sentences)
413
+
414
+ def calculate_similarity_advanced(self, text1: str, text2: str) -> float:
415
+ """Production-grade similarity calculation"""
416
+ if self.similarity_model:
417
+ try:
418
+ embeddings1 = self.similarity_model.encode([text1])
419
+ embeddings2 = self.similarity_model.encode([text2])
420
+ similarity = np.dot(embeddings1[0], embeddings2[0]) / (
421
+ np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0])
422
+ )
423
+ return float(similarity)
424
+ except Exception as e:
425
+ print(f"⚠️ Advanced similarity calculation failed: {e}")
426
+
427
+ # Fallback to TF-IDF
428
+ if self.tfidf_vectorizer and SKLEARN_AVAILABLE:
429
+ try:
430
+ tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2])
431
+ similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
432
+ return float(similarity)
433
+ except Exception as e:
434
+ print(f"⚠️ TF-IDF similarity calculation failed: {e}")
435
+
436
+ # Basic fallback
437
+ words1 = set(word_tokenize(text1.lower()))
438
+ words2 = set(word_tokenize(text2.lower()))
439
+ if not words1 or not words2:
440
+ return 1.0 if text1 == text2 else 0.0
441
+
442
+ intersection = words1.intersection(words2)
443
+ union = words1.union(words2)
444
+ return len(intersection) / len(union) if union else 1.0
445
+
446
+ def humanize_text_production(self,
447
+ text: str,
448
+ style: str = "natural",
449
+ intensity: float = 0.8,
450
+ preserve_length: bool = True,
451
+ quality_threshold: float = 0.75) -> Dict:
452
+ """
453
+ Production-grade text humanization with comprehensive quality control
454
+
455
+ Args:
456
+ text: Input text to humanize
457
+ style: Style ('natural', 'casual', 'conversational')
458
+ intensity: Transformation intensity (0.0 to 1.0)
459
+ preserve_length: Try to maintain similar text length
460
+ quality_threshold: Minimum similarity score to accept
461
+
462
+ Returns:
463
+ Comprehensive results with quality metrics
464
+ """
465
+ if not text.strip():
466
+ return {
467
+ "original_text": text,
468
+ "humanized_text": text,
469
+ "similarity_score": 1.0,
470
+ "changes_made": [],
471
+ "style": style,
472
+ "intensity": intensity,
473
+ "quality_score": 1.0,
474
+ "processing_time_ms": 0.0,
475
+ "feature_usage": {}
476
+ }
477
+
478
+ import time
479
+ start_time = time.time()
480
+
481
+ changes_made = []
482
+ humanized_text = text
483
+ original_text = text
484
+ feature_usage = {}
485
+
486
+ # Step 1: AI transition replacement (early to catch obvious AI patterns)
487
+ if intensity > 0.2:
488
+ before = humanized_text
489
+ humanized_text = self.replace_ai_transitions(humanized_text)
490
+ if humanized_text != before:
491
+ changes_made.append("Replaced AI-like transition phrases")
492
+ feature_usage['ai_transitions'] = True
493
+
494
+ # Step 2: Formal word replacement
495
+ if intensity > 0.3:
496
+ before = humanized_text
497
+ humanized_text = self.replace_formal_words(humanized_text, intensity * 0.9)
498
+ if humanized_text != before:
499
+ changes_made.append("Replaced formal words with casual alternatives")
500
+ feature_usage['word_replacement'] = True
501
+
502
+ # Step 3: Add contractions
503
+ if intensity > 0.4:
504
+ before = humanized_text
505
+ humanized_text = self.add_contractions(humanized_text)
506
+ if humanized_text != before:
507
+ changes_made.append("Added natural contractions")
508
+ feature_usage['contractions'] = True
509
+
510
+ # Step 4: Advanced paraphrasing (if available)
511
+ if intensity > 0.6 and self.paraphraser:
512
+ before = humanized_text
513
+ humanized_text = self.advanced_paraphrasing(humanized_text, intensity * 0.5)
514
+ if humanized_text != before:
515
+ changes_made.append("Applied AI paraphrasing for natural flow")
516
+ feature_usage['paraphrasing'] = True
517
+
518
+ # Step 5: Calculate quality metrics
519
+ processing_time = (time.time() - start_time) * 1000
520
+ similarity_score = self.calculate_similarity_advanced(original_text, humanized_text)
521
+
522
+ # Quality control - revert if similarity too low
523
+ if similarity_score < quality_threshold:
524
+ print(f"⚠️ Quality check failed (similarity: {similarity_score:.3f})")
525
+ humanized_text = original_text
526
+ similarity_score = 1.0
527
+ changes_made = ["Quality threshold not met - reverted to original"]
528
+ feature_usage['quality_control'] = True
529
+
530
+ # Calculate comprehensive quality score
531
+ length_ratio = len(humanized_text) / len(original_text) if original_text else 1.0
532
+ length_penalty = max(0, 1.0 - abs(length_ratio - 1.0)) if preserve_length else 1.0
533
+ change_score = min(1.0, len(changes_made) / 5.0) # Reward meaningful changes
534
+
535
+ quality_score = (similarity_score * 0.5) + (length_penalty * 0.3) + (change_score * 0.2)
536
+
537
+ return {
538
+ "original_text": original_text,
539
+ "humanized_text": humanized_text,
540
+ "similarity_score": similarity_score,
541
+ "quality_score": quality_score,
542
+ "changes_made": changes_made,
543
+ "style": style,
544
+ "intensity": intensity,
545
+ "processing_time_ms": processing_time,
546
+ "feature_usage": feature_usage,
547
+ "length_change": len(humanized_text) - len(original_text),
548
+ "word_count_change": len(humanized_text.split()) - len(original_text.split())
549
+ }
550
+
551
+ # Convenience function for backward compatibility
552
+ def AITextHumanizer():
553
+ """Factory function for backward compatibility"""
554
+ return ProductionAITextHumanizer()
555
+
556
+ # Test the production version
557
+ if __name__ == "__main__":
558
+ humanizer = ProductionAITextHumanizer()
559
+
560
+ test_texts = [
561
+ "Furthermore, it is important to note that artificial intelligence systems demonstrate significant capabilities.",
562
+ "The implementation of comprehensive methodologies will facilitate optimization and enhance operational efficiency.",
563
+ "Subsequently, organizations must utilize systematic approaches to evaluate and implement technological solutions."
564
+ ]
565
+
566
+ print(f"\n🧪 TESTING PRODUCTION HUMANIZER")
567
+ print("=" * 40)
568
+
569
+ for i, test_text in enumerate(test_texts, 1):
570
+ print(f"\n🔬 Test {i}:")
571
+ print(f"Original: {test_text}")
572
+
573
+ result = humanizer.humanize_text_production(
574
+ text=test_text,
575
+ style="conversational",
576
+ intensity=0.8
577
+ )
578
+
579
+ print(f"Humanized: {result['humanized_text']}")
580
+ print(f"Quality Score: {result['quality_score']:.3f}")
581
+ print(f"Similarity: {result['similarity_score']:.3f}")
582
+ print(f"Processing: {result['processing_time_ms']:.1f}ms")
583
+ print(f"Changes: {', '.join(result['changes_made']) if result['changes_made'] else 'None'}")
584
+
585
+ print(f"\n🎉 Production testing completed!")