Jay-Rajput commited on
Commit
f43f7c7
Β·
1 Parent(s): 8cd6bf6

universal humanizer

Browse files
Files changed (2) hide show
  1. app.py +44 -0
  2. universal_humanizer.py +0 -16
app.py CHANGED
@@ -4,6 +4,50 @@
4
  import gradio as gr
5
  import time
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Import our universal humanizer
9
  from universal_humanizer import UniversalAITextHumanizer
 
4
  import gradio as gr
5
  import time
6
  import os
7
+ import nltk
8
+
9
+ def ensure_nltk_resources():
10
+ """Ensure minimal NLTK data for tokenizing and lemmatization."""
11
+ resources = {
12
+ 'punkt': 'tokenizers/punkt',
13
+ 'wordnet': 'corpora/wordnet',
14
+ 'omw-1.4': 'corpora/omw-1.4'
15
+ }
16
+ for name, path in resources.items():
17
+ try:
18
+ nltk.data.find(path)
19
+ print(f"βœ… Resource already present: {name}")
20
+ except LookupError:
21
+ print(f"πŸ”„ Downloading {name} …")
22
+ try:
23
+ nltk.download(name, quiet=True)
24
+ print(f"βœ… Downloaded {name}")
25
+ except Exception as e:
26
+ print(f"❌ Failed to download {name}: {e}")
27
+
28
+ def test_nltk_setup():
29
+ """Test basic tokenization & lemmatization to verify setup."""
30
+ from nltk.tokenize import word_tokenize, sent_tokenize
31
+ from nltk.stem import WordNetLemmatizer
32
+
33
+ text = "This is a test. Testing tokenization and lemmatization."
34
+ # Test sentence splitting
35
+ sentences = sent_tokenize(text)
36
+ print(f"Sentence tokenize works: {len(sentences)} sentences: {sentences}")
37
+ # Test word tokenization
38
+ words = word_tokenize(text)
39
+ print(f"Word tokenize works: {len(words)} words: {words}")
40
+ # Test lemmatization
41
+ lemmatizer = WordNetLemmatizer()
42
+ lem = [lemmatizer.lemmatize(w) for w in words]
43
+ print(f"Lemmatization works: {lem}")
44
+
45
+ # In startup part of your app
46
+ print("πŸš€ Ensuring NLTK minimal resources …")
47
+ ensure_nltk_resources()
48
+ print("πŸ”§ Testing NLTK setup …")
49
+ test_nltk_setup()
50
+
51
 
52
  # Import our universal humanizer
53
  from universal_humanizer import UniversalAITextHumanizer
universal_humanizer.py CHANGED
@@ -7,22 +7,6 @@ import time
7
  from collections import Counter
8
  import statistics
9
 
10
- # Download required NLTK data
11
- def ensure_nltk_data():
12
- try:
13
- nltk.data.find('tokenizers/punkt')
14
- except LookupError:
15
- nltk.download('punkt', quiet=True)
16
- try:
17
- nltk.data.find('corpora/wordnet')
18
- except LookupError:
19
- nltk.download('wordnet', quiet=True)
20
- try:
21
- nltk.data.find('corpora/omw-1.4')
22
- except LookupError:
23
- nltk.download('omw-1.4', quiet=True)
24
-
25
- ensure_nltk_data()
26
  from nltk.tokenize import sent_tokenize, word_tokenize
27
  from nltk.corpus import wordnet
28
 
 
7
  from collections import Counter
8
  import statistics
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from nltk.tokenize import sent_tokenize, word_tokenize
11
  from nltk.corpus import wordnet
12