Spaces:

Jay-Rajput
/

AIHumanizer

Running

App Files Files Community

Jay-Rajput commited on Sep 13, 2025

Commit

f43f7c7

1 Parent(s): 8cd6bf6

universal humanizer

Browse files

Files changed (2) hide show

app.py +44 -0
universal_humanizer.py +0 -16

app.py CHANGED Viewed

@@ -4,6 +4,50 @@
 import gradio as gr
 import time
 import os
 # Import our universal humanizer
 from universal_humanizer import UniversalAITextHumanizer

 import gradio as gr
 import time
 import os
+import nltk
+def ensure_nltk_resources():
+    """Ensure minimal NLTK data for tokenizing and lemmatization."""
+    resources = {
+        'punkt': 'tokenizers/punkt',
+        'wordnet': 'corpora/wordnet',
+        'omw-1.4': 'corpora/omw-1.4'
+    }
+    for name, path in resources.items():
+        try:
+            nltk.data.find(path)
+            print(f"✅ Resource already present: {name}")
+        except LookupError:
+            print(f"🔄 Downloading {name} …")
+            try:
+                nltk.download(name, quiet=True)
+                print(f"✅ Downloaded {name}")
+            except Exception as e:
+                print(f"❌ Failed to download {name}: {e}")
+def test_nltk_setup():
+    """Test basic tokenization & lemmatization to verify setup."""
+    from nltk.tokenize import word_tokenize, sent_tokenize
+    from nltk.stem import WordNetLemmatizer
+    text = "This is a test. Testing tokenization and lemmatization."
+    # Test sentence splitting
+    sentences = sent_tokenize(text)
+    print(f"Sentence tokenize works: {len(sentences)} sentences: {sentences}")
+    # Test word tokenization
+    words = word_tokenize(text)
+    print(f"Word tokenize works: {len(words)} words: {words}")
+    # Test lemmatization
+    lemmatizer = WordNetLemmatizer()
+    lem = [lemmatizer.lemmatize(w) for w in words]
+    print(f"Lemmatization works: {lem}")
+# In startup part of your app
+print("🚀 Ensuring NLTK minimal resources …")
+ensure_nltk_resources()
+print("🔧 Testing NLTK setup …")
+test_nltk_setup()
 # Import our universal humanizer
 from universal_humanizer import UniversalAITextHumanizer

universal_humanizer.py CHANGED Viewed

@@ -7,22 +7,6 @@ import time
 from collections import Counter
 import statistics
-# Download required NLTK data
-def ensure_nltk_data():
-    try:
-        nltk.data.find('tokenizers/punkt')
-    except LookupError:
-        nltk.download('punkt', quiet=True)
-    try:
-        nltk.data.find('corpora/wordnet')
-    except LookupError:
-        nltk.download('wordnet', quiet=True)
-    try:
-        nltk.data.find('corpora/omw-1.4')
-    except LookupError:
-        nltk.download('omw-1.4', quiet=True)
-ensure_nltk_data()
 from nltk.tokenize import sent_tokenize, word_tokenize
 from nltk.corpus import wordnet

 from collections import Counter
 import statistics
 from nltk.tokenize import sent_tokenize, word_tokenize
 from nltk.corpus import wordnet