Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """Academic Text Humanizer - Hugging Face Spaces Deployment""" | |
| # Step 1: Import Libraries | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, set_seed | |
| import hashlib | |
| import re | |
| import gradio as gr | |
| import os | |
| from huggingface_hub import login | |
| # Step 2: Login to Hugging Face | |
| hf_token = os.getenv("HF_TOKEN") | |
| if hf_token: | |
| login(token=hf_token) | |
| # Step 3: Load Model and Tokenizer | |
| print("Loading model and tokenizer...") | |
| model_name = "mistralai/Mistral-7B-Instruct-v0.2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("Model loaded successfully!") | |
| # Step 4: Regional Accent Dictionaries | |
| # USA Academic English Preferences | |
| USA_ACADEMIC_STYLE = { | |
| 'spelling': { | |
| # British → American spelling | |
| 'analyse': 'analyze', | |
| 'analyses': 'analyzes', | |
| 'analysing': 'analyzing', | |
| 'analysed': 'analyzed', | |
| 'behaviour': 'behavior', | |
| 'behaviours': 'behaviors', | |
| 'behavioural': 'behavioral', | |
| 'centre': 'center', | |
| 'centres': 'centers', | |
| 'centred': 'centered', | |
| 'colour': 'color', | |
| 'colours': 'colors', | |
| 'coloured': 'colored', | |
| 'defence': 'defense', | |
| 'favour': 'favor', | |
| 'favours': 'favors', | |
| 'favored': 'favored', | |
| 'favourite': 'favorite', | |
| 'honour': 'honor', | |
| 'honours': 'honors', | |
| 'honoured': 'honored', | |
| 'labour': 'labor', | |
| 'labours': 'labors', | |
| 'laboured': 'labored', | |
| 'licence': 'license', | |
| 'organise': 'organize', | |
| 'organises': 'organizes', | |
| 'organised': 'organized', | |
| 'organising': 'organizing', | |
| 'organisation': 'organization', | |
| 'organisations': 'organizations', | |
| 'realise': 'realize', | |
| 'realises': 'realizes', | |
| 'realised': 'realized', | |
| 'realising': 'realizing', | |
| 'recognise': 'recognize', | |
| 'recognises': 'recognizes', | |
| 'recognised': 'recognized', | |
| 'recognising': 'recognizing', | |
| 'programme': 'program', | |
| 'programmes': 'programs', | |
| 'theatre': 'theater', | |
| 'theatres': 'theaters', | |
| 'travelled': 'traveled', | |
| 'travelling': 'traveling', | |
| 'traveller': 'traveler', | |
| 'modelled': 'modeled', | |
| 'modelling': 'modeling', | |
| 'cancelled': 'canceled', | |
| 'cancelling': 'canceling', | |
| 'counsellor': 'counselor', | |
| 'counselling': 'counseling', | |
| 'jewellery': 'jewelry', | |
| 'fulfil': 'fulfill', | |
| 'fulfilment': 'fulfillment', | |
| 'skilful': 'skillful', | |
| 'grey': 'gray', | |
| 'practise': 'practice', | |
| 'practising': 'practicing', | |
| 'practised': 'practiced', | |
| 'emphasise': 'emphasize', | |
| 'emphasises': 'emphasizes', | |
| 'emphasised': 'emphasized', | |
| 'emphasising': 'emphasizing', | |
| 'summarise': 'summarize', | |
| 'summarises': 'summarizes', | |
| 'summarised': 'summarized', | |
| 'summarising': 'summarizing', | |
| 'categorise': 'categorize', | |
| 'categorises': 'categorizes', | |
| 'categorised': 'categorized', | |
| 'categorising': 'categorizing', | |
| 'characterise': 'characterize', | |
| 'characterises': 'characterizes', | |
| 'characterised': 'characterized', | |
| 'characterising': 'characterizing', | |
| 'criticise': 'criticize', | |
| 'criticises': 'criticizes', | |
| 'criticised': 'criticized', | |
| 'criticising': 'criticizing', | |
| 'finalise': 'finalize', | |
| 'finalises': 'finalizes', | |
| 'finalised': 'finalized', | |
| 'finalising': 'finalizing', | |
| 'generalise': 'generalize', | |
| 'generalises': 'generalizes', | |
| 'generalised': 'generalized', | |
| 'generalising': 'generalizing', | |
| 'hypothesise': 'hypothesize', | |
| 'hypothesises': 'hypothesizes', | |
| 'hypothesised': 'hypothesized', | |
| 'hypothesising': 'hypothesizing', | |
| 'maximise': 'maximize', | |
| 'maximises': 'maximizes', | |
| 'maximised': 'maximized', | |
| 'maximising': 'maximizing', | |
| 'minimise': 'minimize', | |
| 'minimises': 'minimizes', | |
| 'minimised': 'minimized', | |
| 'minimising': 'minimizing', | |
| 'normalise': 'normalize', | |
| 'normalises': 'normalizes', | |
| 'normalised': 'normalized', | |
| 'normalising': 'normalizing', | |
| 'optimise': 'optimize', | |
| 'optimises': 'optimizes', | |
| 'optimised': 'optimized', | |
| 'optimising': 'optimizing', | |
| 'standardise': 'standardize', | |
| 'standardises': 'standardizes', | |
| 'standardised': 'standardized', | |
| 'standardising': 'standardizing', | |
| 'utilise': 'utilize', | |
| 'utilises': 'utilizes', | |
| 'utilised': 'utilized', | |
| 'utilising': 'utilizing', | |
| 'visualise': 'visualize', | |
| 'visualises': 'visualizes', | |
| 'visualised': 'visualized', | |
| 'visualising': 'visualizing', | |
| 'apologise': 'apologize', | |
| 'apologises': 'apologizes', | |
| 'apologised': 'apologized', | |
| 'apologising': 'apologizing', | |
| 'capitalise': 'capitalize', | |
| 'capitalises': 'capitalizes', | |
| 'capitalised': 'capitalized', | |
| 'capitalising': 'capitalizing', | |
| 'globalise': 'globalize', | |
| 'globalises': 'globalizes', | |
| 'globalised': 'globalized', | |
| 'globalising': 'globalizing', | |
| 'industrialise': 'industrialize', | |
| 'industrialises': 'industrializes', | |
| 'industrialised': 'industrialized', | |
| 'industrialising': 'industrializing', | |
| 'materialise': 'materialize', | |
| 'materialises': 'materializes', | |
| 'materialised': 'materialized', | |
| 'materialising': 'materializing', | |
| 'mobilise': 'mobilize', | |
| 'mobilises': 'mobilizes', | |
| 'mobilised': 'mobilized', | |
| 'mobilising': 'mobilizing', | |
| 'modernise': 'modernize', | |
| 'modernises': 'modernizes', | |
| 'modernised': 'modernized', | |
| 'modernising': 'modernizing', | |
| 'privatise': 'privatize', | |
| 'privatises': 'privatizes', | |
| 'privatised': 'privatized', | |
| 'privatising': 'privatizing', | |
| 'rationalise': 'rationalize', | |
| 'rationalises': 'rationalizes', | |
| 'rationalised': 'rationalized', | |
| 'rationalising': 'rationalizing', | |
| 'revolutionise': 'revolutionize', | |
| 'revolutionises': 'revolutionizes', | |
| 'revolutionised': 'revolutionized', | |
| 'revolutionising': 'revolutionizing', | |
| 'socialise': 'socialize', | |
| 'socialises': 'socializes', | |
| 'socialised': 'socialized', | |
| 'socialising': 'socializing', | |
| 'specialise': 'specialize', | |
| 'specialises': 'specializes', | |
| 'specialised': 'specialized', | |
| 'specialising': 'specializing', | |
| 'stabilise': 'stabilize', | |
| 'stabilises': 'stabilizes', | |
| 'stabilised': 'stabilized', | |
| 'stabilising': 'stabilizing', | |
| 'symbolise': 'symbolize', | |
| 'symbolises': 'symbolizes', | |
| 'symbolised': 'symbolized', | |
| 'symbolising': 'symbolizing', | |
| 'synthesise': 'synthesize', | |
| 'synthesises': 'synthesizes', | |
| 'synthesised': 'synthesized', | |
| 'synthesising': 'synthesizing', | |
| 'theorise': 'theorize', | |
| 'theorises': 'theorizes', | |
| 'theorised': 'theorized', | |
| 'theorising': 'theorizing', | |
| 'urbanise': 'urbanize', | |
| 'urbanises': 'urbanizes', | |
| 'urbanised': 'urbanized', | |
| 'urbanising': 'urbanizing', | |
| }, | |
| 'phrases': { | |
| 'at the weekend': 'on the weekend', | |
| 'in hospital': 'in the hospital', | |
| 'in future': 'in the future', | |
| 'at university': 'at the university', | |
| 'different to': 'different from', | |
| 'different than': 'different from', | |
| 'write to': 'write', | |
| 'Monday to Friday': 'Monday through Friday', | |
| }, | |
| 'punctuation': { | |
| 'quotation_style': 'double', | |
| 'period_with_quotes': 'inside', | |
| }, | |
| 'vocabulary': { | |
| 'whilst': 'while', | |
| 'amongst': 'among', | |
| 'towards': 'toward', | |
| 'afterwards': 'afterward', | |
| 'forwards': 'forward', | |
| 'backwards': 'backward', | |
| 'upwards': 'upward', | |
| 'downwards': 'downward', | |
| 'learnt': 'learned', | |
| 'burnt': 'burned', | |
| 'dreamt': 'dreamed', | |
| 'spelt': 'spelled', | |
| 'spoilt': 'spoiled', | |
| } | |
| } | |
| # UK Academic English Preferences | |
| UK_ACADEMIC_STYLE = { | |
| 'spelling': { | |
| # American → British spelling | |
| 'analyze': 'analyse', | |
| 'analyzes': 'analyses', | |
| 'analyzing': 'analysing', | |
| 'analyzed': 'analysed', | |
| 'behavior': 'behaviour', | |
| 'behaviors': 'behaviours', | |
| 'behavioral': 'behavioural', | |
| 'center': 'centre', | |
| 'centers': 'centres', | |
| 'centered': 'centred', | |
| 'color': 'colour', | |
| 'colors': 'colours', | |
| 'colored': 'coloured', | |
| 'defense': 'defence', | |
| 'favor': 'favour', | |
| 'favors': 'favours', | |
| 'favored': 'favoured', | |
| 'favorite': 'favourite', | |
| 'honor': 'honour', | |
| 'honors': 'honours', | |
| 'honored': 'honoured', | |
| 'labor': 'labour', | |
| 'labors': 'labours', | |
| 'labored': 'laboured', | |
| 'license': 'licence', | |
| 'organize': 'organise', | |
| 'organizes': 'organises', | |
| 'organized': 'organised', | |
| 'organizing': 'organising', | |
| 'organization': 'organisation', | |
| 'organizations': 'organisations', | |
| 'realize': 'realise', | |
| 'realizes': 'realises', | |
| 'realized': 'realised', | |
| 'realizing': 'realising', | |
| 'recognize': 'recognise', | |
| 'recognizes': 'recognises', | |
| 'recognized': 'recognised', | |
| 'recognizing': 'recognising', | |
| 'program': 'programme', | |
| 'programs': 'programmes', | |
| 'theater': 'theatre', | |
| 'theaters': 'theatres', | |
| 'traveled': 'travelled', | |
| 'traveling': 'travelling', | |
| 'traveler': 'traveller', | |
| 'modeled': 'modelled', | |
| 'modeling': 'modelling', | |
| 'canceled': 'cancelled', | |
| 'canceling': 'cancelling', | |
| 'counselor': 'counsellor', | |
| 'counseling': 'counselling', | |
| 'jewelry': 'jewellery', | |
| 'fulfill': 'fulfil', | |
| 'fulfillment': 'fulfilment', | |
| 'skillful': 'skilful', | |
| 'gray': 'grey', | |
| 'practice': 'practise', | |
| 'practicing': 'practising', | |
| 'practiced': 'practised', | |
| 'emphasize': 'emphasise', | |
| 'emphasizes': 'emphasises', | |
| 'emphasized': 'emphasised', | |
| 'emphasizing': 'emphasising', | |
| 'summarize': 'summarise', | |
| 'summarizes': 'summarises', | |
| 'summarized': 'summarised', | |
| 'summarizing': 'summarising', | |
| 'categorize': 'categorise', | |
| 'categorizes': 'categorises', | |
| 'categorized': 'categorised', | |
| 'categorizing': 'categorising', | |
| 'characterize': 'characterise', | |
| 'characterizes': 'characterises', | |
| 'characterized': 'characterised', | |
| 'characterizing': 'characterising', | |
| 'criticize': 'criticise', | |
| 'criticizes': 'criticises', | |
| 'criticized': 'criticised', | |
| 'criticizing': 'criticising', | |
| 'finalize': 'finalise', | |
| 'finalizes': 'finalises', | |
| 'finalized': 'finalised', | |
| 'finalizing': 'finalising', | |
| 'generalize': 'generalise', | |
| 'generalizes': 'generalises', | |
| 'generalized': 'generalised', | |
| 'generalizing': 'generalising', | |
| 'hypothesize': 'hypothesise', | |
| 'hypothesizes': 'hypothesises', | |
| 'hypothesized': 'hypothesised', | |
| 'hypothesizing': 'hypothesising', | |
| 'maximize': 'maximise', | |
| 'maximizes': 'maximises', | |
| 'maximized': 'maximised', | |
| 'maximizing': 'maximising', | |
| 'minimize': 'minimise', | |
| 'minimizes': 'minimises', | |
| 'minimized': 'minimised', | |
| 'minimizing': 'minimising', | |
| 'normalize': 'normalise', | |
| 'normalizes': 'normalises', | |
| 'normalized': 'normalised', | |
| 'normalizing': 'normalising', | |
| 'optimize': 'optimise', | |
| 'optimizes': 'optimises', | |
| 'optimized': 'optimised', | |
| 'optimizing': 'optimising', | |
| 'standardize': 'standardise', | |
| 'standardizes': 'standardises', | |
| 'standardized': 'standardised', | |
| 'standardizing': 'standardising', | |
| 'utilize': 'utilise', | |
| 'utilizes': 'utilises', | |
| 'utilized': 'utilised', | |
| 'utilizing': 'utilising', | |
| 'visualize': 'visualise', | |
| 'visualizes': 'visualises', | |
| 'visualized': 'visualised', | |
| 'visualizing': 'visualising', | |
| 'apologize': 'apologise', | |
| 'apologizes': 'apologises', | |
| 'apologized': 'apologised', | |
| 'apologizing': 'apologising', | |
| 'capitalize': 'capitalise', | |
| 'capitalizes': 'capitalises', | |
| 'capitalized': 'capitalised', | |
| 'capitalizing': 'capitalising', | |
| 'globalize': 'globalise', | |
| 'globalizes': 'globalises', | |
| 'globalized': 'globalised', | |
| 'globalizing': 'globalising', | |
| 'industrialize': 'industrialise', | |
| 'industrializes': 'industrialises', | |
| 'industrialized': 'industrialised', | |
| 'industrializing': 'industrialising', | |
| 'materialize': 'materialise', | |
| 'materializes': 'materialises', | |
| 'materialized': 'materialised', | |
| 'materializing': 'materialising', | |
| 'mobilize': 'mobilise', | |
| 'mobilizes': 'mobilises', | |
| 'mobilized': 'mobilised', | |
| 'mobilizing': 'mobilising', | |
| 'modernize': 'modernise', | |
| 'modernizes': 'modernises', | |
| 'modernized': 'modernised', | |
| 'modernizing': 'modernising', | |
| 'privatize': 'privatise', | |
| 'privatizes': 'privatises', | |
| 'privatized': 'privatised', | |
| 'privatizing': 'privatising', | |
| 'rationalize': 'rationalise', | |
| 'rationalizes': 'rationalises', | |
| 'rationalized': 'rationalised', | |
| 'rationalizing': 'rationalising', | |
| 'revolutionize': 'revolutionise', | |
| 'revolutionizes': 'revolutionises', | |
| 'revolutionized': 'revolutionised', | |
| 'revolutionizing': 'revolutionising', | |
| 'socialize': 'socialise', | |
| 'socializes': 'socialises', | |
| 'socialized': 'socialised', | |
| 'socializing': 'socialising', | |
| 'specialize': 'specialise', | |
| 'specializes': 'specialises', | |
| 'specialized': 'specialised', | |
| 'specializing': 'specialising', | |
| 'stabilize': 'stabilise', | |
| 'stabilizes': 'stabilises', | |
| 'stabilized': 'stabilised', | |
| 'stabilizing': 'stabilising', | |
| 'symbolize': 'symbolise', | |
| 'symbolizes': 'symbolises', | |
| 'symbolized': 'symbolised', | |
| 'symbolizing': 'symbolising', | |
| 'synthesize': 'synthesise', | |
| 'synthesizes': 'synthesises', | |
| 'synthesized': 'synthesised', | |
| 'synthesizing': 'synthesising', | |
| 'theorize': 'theorise', | |
| 'theorizes': 'theorises', | |
| 'theorized': 'theorised', | |
| 'theorizing': 'theorising', | |
| 'urbanize': 'urbanise', | |
| 'urbanizes': 'urbanises', | |
| 'urbanized': 'urbanised', | |
| 'urbanizing': 'urbanising', | |
| }, | |
| 'phrases': { | |
| 'on the weekend': 'at the weekend', | |
| 'in the hospital': 'in hospital', | |
| 'in the future': 'in future', | |
| 'at the university': 'at university', | |
| 'different from': 'different to', | |
| 'Monday through Friday': 'Monday to Friday', | |
| }, | |
| 'punctuation': { | |
| 'quotation_style': 'single', | |
| 'period_with_quotes': 'outside', | |
| }, | |
| 'vocabulary': { | |
| 'while': 'whilst', | |
| 'among': 'amongst', | |
| 'toward': 'towards', | |
| 'afterward': 'afterwards', | |
| 'forward': 'forwards', | |
| 'backward': 'backwards', | |
| 'upward': 'upwards', | |
| 'downward': 'downwards', | |
| 'learned': 'learnt', | |
| 'burned': 'burnt', | |
| 'dreamed': 'dreamt', | |
| 'spelled': 'spelt', | |
| 'spoiled': 'spoilt', | |
| } | |
| } | |
| # Step 5: ULTRA-DEEP ACADEMIC ENGINE | |
| def get_ultra_deep_academic_engine(accent='usa'): | |
| """ | |
| Returns ultra-deep academic humanization engine for <5% AI detection. | |
| """ | |
| accent_instruction = "" | |
| if accent.lower() == 'usa': | |
| accent_instruction = """ | |
| REGIONAL STYLE: AMERICAN ACADEMIC ENGLISH | |
| - Spelling: -ize (analyze, organize), -or (behavior, color), -er (center) | |
| - Vocabulary: "while", "among", "toward", "learned" | |
| - Phrases: "on the weekend", "in the hospital", "different from" | |
| """ | |
| elif accent.lower() == 'uk': | |
| accent_instruction = """ | |
| REGIONAL STYLE: BRITISH ACADEMIC ENGLISH | |
| - Spelling: -ise (analyse, organise), -our (behaviour, colour), -re (centre) | |
| - Vocabulary: "whilst", "amongst", "towards", "learnt" | |
| - Phrases: "at the weekend", "in hospital", "different to" | |
| """ | |
| ultra_deep_engine = """You are a HUMAN academic researcher rewriting AI text to sound completely natural and undetectable. | |
| 🚨 CRITICAL: Your output MUST score <5% on AI detectors. This requires EXTREME transformation. | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 1: DESTROY ALL AI VOCABULARY (ZERO TOLERANCE) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| FORBIDDEN WORDS - NEVER USE THESE: | |
| ❌ pivotal, paramount, crucial, vital, essential, fundamental, cornerstone, foundational | |
| ❌ multifaceted, nuanced, intricate, complex, comprehensive, holistic | |
| ❌ delve, leverage, utilize, facilitate, enhance, foster, cultivate, empower | |
| ❌ robust, rigorous, substantial, significant, considerable, notable | |
| ❌ myriad, plethora, array, spectrum, gamut, breadth | |
| ❌ endeavor, notion, realm, facet, aspect, dimension | |
| ❌ paradigm, framework, landscape, trajectory, dynamic | |
| ❌ optimal, viable, integral, inherent, quintessential, ubiquitous | |
| ❌ underscores, showcases, exemplifies, encapsulates, illuminates, elucidates | |
| ❌ necessitates, encompasses, embodies, perpetuates, mitigates, exacerbates | |
| ❌ bolsters, galvanizes, transcends, permeates, resonates | |
| ❌ navigating, harnessing, spearheading, pioneering, orchestrating | |
| ❌ attribute, capacity, capability, proficiency, competency | |
| ❌ milieu, domain, sphere, arena, context | |
| ❌ inculcates, instills, engenders, fortifies, bolsters | |
| ❌ practitioners, individuals (overused), persons | |
| ❌ steadfast, unwavering, persistent, diligent | |
| ❌ flourishes, blossoms, dissipating | |
| ❌ constraint, empowerment, deliberatively | |
| ❌ myriad opportunities, enduring triumphs, latent potential | |
| ❌ amidst, amongst, brimming, waning, fleeting, momentary | |
| ❌ surmount, succumbing, propelling, fortifying | |
| ❌ signifies, equips, dictate, destiny, groundwork | |
| REPLACE WITH NATURAL ACADEMIC LANGUAGE: | |
| ✅ important, key, central, basic, core, main | |
| ✅ complex → complicated, detailed, involved | |
| ✅ examine, study, investigate, look at, explore | |
| ✅ use, apply, employ, work with | |
| ✅ improve, strengthen, increase, develop, build | |
| ✅ many, several, numerous, various, multiple | |
| ✅ idea, concept, theory, principle, view | |
| ✅ field, area, subject, topic, domain | |
| ✅ shows, demonstrates, indicates, reveals, suggests | |
| ✅ requires, needs, demands, calls for | |
| ✅ includes, contains, involves, covers | |
| ✅ people, researchers, scholars, students, workers | |
| ✅ consistent, regular, steady, constant | |
| ✅ grows, develops, increases, expands | |
| ✅ limit, restriction, control | |
| ✅ power, ability, capacity, means | |
| ✅ chances, possibilities, options | |
| ✅ lasting, long-term, sustained, continued | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 2: SENTENCE STRUCTURE CHAOS (BREAK AI PATTERNS) **FOLLOW THIS STRICTLY** | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI WRITES: Uniform 20-25 word sentences with perfect rhythm | |
| YOU WRITE: Chaotic variation - 7, 29, 11, 34, 9, 26, 13, 31 words | |
| SENTENCE LENGTH TARGETS: | |
| - Ultra-short (6-10 words): "Discipline matters. It shapes who we become." | |
| - Short (11-15 words): "This quality affects success in school, sports, and work." | |
| - Medium (16-22 words): "Research from Stanford University (2019) shows that disciplined students perform better academically." | |
| - Long (23-30 words): "When people maintain discipline, they develop habits like regular study sessions and healthy eating patterns that support their goals over time." | |
| - Very long (31-38 words): "The connection between discipline and achievement has been documented extensively in psychological literature, with studies indicating that self-control predicts outcomes more reliably than intelligence measures alone (Duckworth, 2016)." | |
| CRITICAL: Mix these randomly. Never use same length twice in a row. | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 3: OPENING VARIATION (NEVER REPEAT PATTERNS) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI PATTERN: Starts every sentence with subject or "This/These/Such" | |
| HUMAN PATTERN: Wild variation | |
| OPENING TYPES (Rotate constantly): | |
| 1. Subject-first: "Discipline shapes behavior." | |
| 2. Prepositional: "In academic settings, discipline predicts success." | |
| 3. Adverb: "However, discipline requires consistent effort." | |
| 4. Participial: "Building on previous research, this study examines..." | |
| 5. Dependent clause: "When students practice discipline, they perform better." | |
| 6. Transitional: "Research shows discipline matters." | |
| 7. Infinitive: "To understand discipline, we must examine its components." | |
| 8. Gerund: "Maintaining discipline requires daily effort." | |
| 9. Direct object: "Success in school demands discipline." | |
| 10. Question: "What makes discipline effective?" | |
| RULE: Never use same opening type in consecutive sentences. | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 4: PASSIVE VOICE STRATEGY (10-20% ONLY) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI OVERUSES PASSIVE: 60-70% passive voice | |
| HUMANS USE: 10-20% passive voice | |
| WHEN TO USE PASSIVE: | |
| ✅ Methods: "Data were collected from 200 participants." | |
| ✅ Results: "Significant differences were observed between groups." | |
| ✅ Objectivity: "The hypothesis was tested using regression analysis." | |
| WHEN TO USE ACTIVE: | |
| ✅ Agency: "Researchers conducted three experiments." | |
| ✅ Clarity: "This study examines the role of discipline." | |
| ✅ Engagement: "Students who practice discipline achieve better grades." | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 5: NATURAL ACADEMIC IMPERFECTIONS | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| HUMANS AREN'T PERFECT. Add these natural elements: | |
| 1. STRATEGIC HEDGING (3-5 per 500 words, NOT every sentence): | |
| ✅ "appears to", "suggests that", "may indicate", "tends to" | |
| ✅ "Research suggests discipline matters" (not "might possibly perhaps indicate") | |
| 2. VARIED TRANSITIONS (NOT formulaic): | |
| ❌ AVOID: Moreover, Furthermore, Additionally, In addition (AI overuses these) | |
| ✅ USE: However, Nevertheless, In contrast, Similarly, Research shows, Studies indicate | |
| ✅ USE: Building on this, Extending this analysis, This finding suggests | |
| 3. CITATION INTEGRATION (Specific, varied): | |
| ❌ "Research shows" (vague, AI-like) | |
| ✅ "According to Smith (2019)", "Duckworth et al. (2016) found", "Recent studies demonstrate" | |
| 4. PUNCTUATION VARIETY: | |
| - Use semicolons sparingly (2-3 per 500 words) | |
| - Em dashes for emphasis — like this — occasionally | |
| - Parenthetical asides (as noted earlier) for natural flow | |
| - Colons for lists: first, second, third | |
| 5. PARAGRAPH BREAKS: | |
| - Vary paragraph length: 3 sentences, 5 sentences, 2 sentences, 6 sentences | |
| - NOT uniform 4-sentence paragraphs (AI pattern) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 6: ELIMINATE AI DRAMA & INTENSITY | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI LOVES DRAMA: | |
| ❌ "profound implications", "groundbreaking", "revolutionary", "unprecedented" | |
| ❌ "deeply", "highly", "extremely", "incredibly", "remarkably", "exceptionally" | |
| ❌ "brimming with", "trapped in cycles", "fleeting inclinations" | |
| ACADEMIC TONE: | |
| ✅ "important implications", "new", "significant", "recent" | |
| ✅ "considerably", "substantially", "notably", "particularly" | |
| ✅ "full of", "stuck in patterns", "temporary desires" | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 7: SPECIFIC EXAMPLES & CONCRETE LANGUAGE | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI STAYS ABSTRACT: | |
| ❌ "various domains", "multiple contexts", "diverse settings" | |
| HUMANS USE SPECIFICS: | |
| ✅ "in schools, workplaces, and sports teams" | |
| ✅ "students, employees, and athletes" | |
| ✅ "studying regularly, eating healthy, arriving on time" | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| PHASE 8: BREAK PERFECT PARALLELISM | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| AI WRITES: "analyzing data, examining results, and evaluating outcomes" | |
| (Perfect parallel structure - all gerunds) | |
| HUMAN WRITES: "data analysis, examination of results, and outcome evaluation" | |
| (Mixed structures - noun, noun phrase, noun) | |
| OR: "analyzing data, result examination, and how we evaluate outcomes" | |
| (Gerund, noun, clause - deliberately imperfect) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| FINAL CHECKLIST FOR <5% AI DETECTION: | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| ✓ ZERO forbidden AI vocabulary (check every word) | |
| ✓ Sentence lengths: 7, 29, 11, 34, 9, 26, 13, 31, 18, 27 (chaotic) | |
| ✓ Opening types: Never repeat consecutively | |
| ✓ Passive voice: 30-35% (not 60%+) | |
| ✓ Hedging: 3-5 instances per 500 words (strategic, not excessive) | |
| ✓ Transitions: Varied, not formulaic (avoid Moreover/Furthermore/Additionally) | |
| ✓ Citations: Specific with years (Author, Year) | |
| ✓ No AI drama words (profound, groundbreaking, deeply, highly) | |
| ✓ Concrete examples (not abstract "various domains") | |
| ✓ Imperfect parallelism (mixed grammatical structures) | |
| ✓ Natural punctuation variety (semicolons, em dashes, parentheses) | |
| ✓ Paragraph length variation (not uniform) | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| EXAMPLE TRANSFORMATION: | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| ❌ AI TEXT (39% AI): | |
| "Discipline is a pivotal attribute for personal development, shaping both conduct and identity, influencing success, habits formation, interpersonal relations, and an individual's overall sense of purpose." | |
| ✅ HUMAN TEXT (<5% AI): | |
| "Discipline shapes personal growth. It affects how people behave and who they become, influencing success in school and work, habit formation, relationships with others, and sense of purpose in life." | |
| NOTICE THE DIFFERENCES: | |
| - Removed: pivotal, attribute, conduct, interpersonal, individual's, overall | |
| - Added: Concrete examples (school, work) | |
| - Broke into 2 sentences (7 words, then 23 words) | |
| - Used simple language (shapes, affects, people, relationships) | |
| - Maintained academic tone WITHOUT AI vocabulary | |
| """ | |
| return ultra_deep_engine + accent_instruction + """ | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| NOW REWRITE THE TEXT BELOW: | |
| ═══════════════════════════════════════════════════════════════════════════════ | |
| REQUIREMENTS: | |
| 1. Maintain formal academic tone | |
| 2. Use ZERO forbidden AI vocabulary | |
| 3. Apply extreme sentence variation | |
| 4. Include specific examples | |
| 5. Break perfect parallelism | |
| 6. Use 10-20% passive voice | |
| 7. Add natural imperfections | |
| 8. Target <5% AI detection | |
| OUTPUT ONLY THE REWRITTEN TEXT. NO EXPLANATIONS.""" | |
| # Step 6: Apply Regional Accent | |
| def apply_accent(text, accent='usa'): | |
| """ | |
| Applies regional spelling and vocabulary conventions. | |
| """ | |
| if accent.lower() == 'usa': | |
| style = USA_ACADEMIC_STYLE | |
| elif accent.lower() == 'uk': | |
| style = UK_ACADEMIC_STYLE | |
| else: | |
| return text | |
| # Apply spelling changes | |
| for original, replacement in style['spelling'].items(): | |
| pattern = r'\b' + re.escape(original) + r'\b' | |
| text = re.sub(pattern, replacement, text) | |
| if original[0].islower(): | |
| cap_original = original.capitalize() | |
| cap_replacement = replacement.capitalize() | |
| pattern_cap = r'\b' + re.escape(cap_original) + r'\b' | |
| text = re.sub(pattern_cap, cap_replacement, text) | |
| # Apply vocabulary changes | |
| for original, replacement in style['vocabulary'].items(): | |
| pattern = r'\b' + re.escape(original) + r'\b' | |
| text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) | |
| # Apply phrase changes | |
| for original, replacement in style['phrases'].items(): | |
| text = text.replace(original, replacement) | |
| text = text.replace(original.capitalize(), replacement.capitalize()) | |
| return text | |
| # Step 7: Create Deterministic Configuration | |
| def create_ultra_humanized_config(input_text, accent, tokenizer): | |
| """ | |
| Creates config optimized for <5% AI detection. | |
| """ | |
| combined = f"{input_text}_{accent}" | |
| text_hash = hashlib.md5(combined.encode()).hexdigest() | |
| seed = int(text_hash[:8], 16) % (2**32) | |
| set_seed(seed) | |
| gen_config = GenerationConfig( | |
| max_new_tokens=4096, | |
| temperature=1.20, | |
| top_p=0.95, | |
| top_k=80, | |
| do_sample=True, | |
| repetition_penalty=1.25, | |
| no_repeat_ngram_size=5, | |
| seed=seed, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| return gen_config, seed | |
| # Step 8: Humanize with Ultra-Deep Transformations | |
| def humanize_academic_text(ai_text, accent='usa'): | |
| """ | |
| Ultra-deep humanization for <5% AI detection. | |
| """ | |
| academic_engine = get_ultra_deep_academic_engine(accent) | |
| gen_config, seed = create_ultra_humanized_config(ai_text, accent, tokenizer) | |
| print(f"Processing with {accent.upper()} accent (seed: {seed})") | |
| print("Using ultra-deep humanization for <5% AI detection...") | |
| prompt = f"""<s>[INST] {academic_engine} | |
| --- INPUT TEXT --- | |
| {ai_text} | |
| --- END INPUT --- | |
| ### REWRITTEN TEXT ({accent.upper()} ENGLISH): | |
| [/INST]""" | |
| inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=4096) | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| print("Generating ultra-humanized academic text...") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| generation_config=gen_config | |
| ) | |
| full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| if "[/INST]" in full_output: | |
| humanized = full_output.split("[/INST]")[-1].strip() | |
| else: | |
| humanized = full_output.strip() | |
| humanized = apply_accent(humanized, accent) | |
| humanized = deep_cleanup(humanized) | |
| return humanized | |
| def deep_cleanup(text): | |
| """ | |
| Removes ALL AI vocabulary with SIMPLE human replacements. | |
| """ | |
| forbidden_replacements = { | |
| 'pivotal': 'important', | |
| 'paramount': 'important', | |
| 'crucial': 'important', | |
| 'vital': 'key', | |
| 'essential': 'needed', | |
| 'fundamental': 'basic', | |
| 'cornerstone': 'foundation', | |
| 'foundational': 'basic', | |
| 'multifaceted': 'complex', | |
| 'nuanced': 'detailed', | |
| 'intricate': 'complicated', | |
| 'comprehensive': 'complete', | |
| 'holistic': 'whole', | |
| 'delve': 'examine', | |
| 'leverage': 'use', | |
| 'utilize': 'use', | |
| 'utilise': 'use', | |
| 'facilitate': 'help', | |
| 'enhance': 'improve', | |
| 'foster': 'support', | |
| 'cultivate': 'develop', | |
| 'empower': 'enable', | |
| 'robust': 'strong', | |
| 'rigorous': 'thorough', | |
| 'substantial': 'large', | |
| 'significant': 'important', | |
| 'considerable': 'large', | |
| 'notable': 'important', | |
| 'myriad': 'many', | |
| 'plethora': 'many', | |
| 'array': 'range', | |
| 'spectrum': 'range', | |
| 'gamut': 'range', | |
| 'breadth': 'range', | |
| 'endeavor': 'effort', | |
| 'endeavour': 'effort', | |
| 'notion': 'idea', | |
| 'realm': 'area', | |
| 'facet': 'aspect', | |
| 'dimension': 'part', | |
| 'paradigm': 'model', | |
| 'framework': 'structure', | |
| 'landscape': 'field', | |
| 'trajectory': 'path', | |
| 'dynamic': 'changing', | |
| 'optimal': 'best', | |
| 'viable': 'workable', | |
| 'integral': 'key', | |
| 'inherent': 'natural', | |
| 'quintessential': 'typical', | |
| 'ubiquitous': 'common', | |
| 'underscores': 'shows', | |
| 'showcases': 'displays', | |
| 'exemplifies': 'shows', | |
| 'encapsulates': 'captures', | |
| 'illuminates': 'reveals', | |
| 'elucidates': 'explains', | |
| 'necessitates': 'requires', | |
| 'encompasses': 'includes', | |
| 'embodies': 'represents', | |
| 'perpetuates': 'continues', | |
| 'mitigates': 'reduces', | |
| 'exacerbates': 'worsens', | |
| 'bolsters': 'supports', | |
| 'galvanizes': 'energizes', | |
| 'transcends': 'goes beyond', | |
| 'permeates': 'spreads through', | |
| 'resonates': 'connects', | |
| 'navigating': 'dealing with', | |
| 'harnessing': 'using', | |
| 'spearheading': 'leading', | |
| 'pioneering': 'starting', | |
| 'orchestrating': 'organizing', | |
| 'attribute': 'quality', | |
| 'capacity': 'ability', | |
| 'capability': 'ability', | |
| 'proficiency': 'skill', | |
| 'competency': 'skill', | |
| 'milieu': 'environment', | |
| 'domain': 'field', | |
| 'sphere': 'area', | |
| 'arena': 'field', | |
| 'context': 'setting', | |
| 'inculcates': 'teaches', | |
| 'instills': 'creates', | |
| 'engenders': 'creates', | |
| 'fortifies': 'strengthens', | |
| 'practitioners': 'professionals', | |
| 'individuals': 'people', | |
| 'steadfast': 'steady', | |
| 'unwavering': 'constant', | |
| 'persistent': 'continuing', | |
| 'diligent': 'careful', | |
| 'flourishes': 'grows', | |
| 'blossoms': 'develops', | |
| 'dissipating': 'fading', | |
| 'constraint': 'limit', | |
| 'empowerment': 'power', | |
| 'deliberatively': 'deliberately', | |
| 'amidst': 'among', | |
| 'amongst': 'among', | |
| 'brimming': 'full', | |
| 'waning': 'decreasing', | |
| 'fleeting': 'brief', | |
| 'momentary': 'brief', | |
| 'surmount': 'overcome', | |
| 'succumbing': 'giving in', | |
| 'propelling': 'pushing', | |
| 'signifies': 'means', | |
| 'equips': 'prepares', | |
| 'dictate': 'control', | |
| 'destiny': 'future', | |
| 'groundwork': 'foundation', | |
| 'immense': 'large', | |
| 'significance': 'importance', | |
| 'unconscious': 'automatic', | |
| 'procedures': 'processes', | |
| 'cognitive': 'mental', | |
| 'strain': 'effort', | |
| 'beneficial': 'good', | |
| 'customs': 'habits', | |
| 'detrimental': 'harmful', | |
| 'insidiously': 'quietly', | |
| 'cumulative': 'combined', | |
| 'devotion': 'commitment', | |
| 'preservation': 'keeping', | |
| 'correlation': 'connection', | |
| 'anticipatibility': 'predictability', | |
| 'consciously': 'deliberately', | |
| 'engineering': 'designing', | |
| 'progressive': 'steady', | |
| 'evolution': 'progress', | |
| 'profound': 'important', | |
| 'groundbreaking': 'new', | |
| 'revolutionary': 'new', | |
| 'unprecedented': 'new', | |
| 'transformative': 'changing', | |
| 'deeply': 'very', | |
| 'highly': 'very', | |
| 'extremely': 'very', | |
| 'incredibly': 'very', | |
| 'remarkably': 'notably', | |
| 'exceptionally': 'notably', | |
| 'extraordinarily': 'notably', | |
| 'moreover': 'also', | |
| 'furthermore': 'also', | |
| 'additionally': 'also', | |
| 'thus': 'so', | |
| 'hence': 'so', | |
| 'thereby': 'by doing this', | |
| 'wherein': 'where', | |
| 'whereby': 'by which', | |
| } | |
| for forbidden, replacement in forbidden_replacements.items(): | |
| pattern = r'\b' + re.escape(forbidden) + r'\b' | |
| text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) | |
| return text | |
| # Step 9: Post-Processing | |
| def polish_academic_text(text): | |
| """ | |
| Final polish for academic text. | |
| """ | |
| contractions = { | |
| "don't": "do not", "doesn't": "does not", "didn't": "did not", | |
| "can't": "cannot", "couldn't": "could not", "wouldn't": "would not", | |
| "shouldn't": "should not", "won't": "will not", "isn't": "is not", | |
| "aren't": "are not", "wasn't": "was not", "weren't": "were not", | |
| "haven't": "have not", "hasn't": "has not", "hadn't": "had not", | |
| "it's": "it is", "that's": "that is", "there's": "there is", | |
| } | |
| for contraction, full_form in contractions.items(): | |
| text = text.replace(contraction, full_form) | |
| text = text.replace(contraction.capitalize(), full_form.capitalize()) | |
| return text | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # GRADIO WEB INTERFACE | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def convert_text_gradio(ai_text): | |
| """Gradio wrapper function""" | |
| if not ai_text.strip(): | |
| return "⚠️ Please enter some text to convert!" | |
| try: | |
| accent = "usa" | |
| humanized = humanize_academic_text(ai_text, accent) | |
| humanized = polish_academic_text(humanized) | |
| return humanized | |
| except Exception as e: | |
| import traceback | |
| return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}" | |
| # Custom CSS | |
| custom_css = """ | |
| .button-row { | |
| justify-content: center !important; | |
| } | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: auto !important; | |
| } | |
| """ | |
| # Create Gradio Interface (Compatible with all Gradio versions) | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.HTML(""" | |
| <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| padding: 40px 20px; | |
| border-radius: 15px; | |
| margin-bottom: 30px; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.2);'> | |
| <h1 style='color: white; | |
| font-size: 42px; | |
| margin-bottom: 15px; | |
| text-align: center; | |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.3);'> | |
| 🎓 Academic Text Humanizer | |
| </h1> | |
| <p style='color: #f0f0f0; | |
| font-size: 18px; | |
| text-align: center; | |
| margin: 0; | |
| font-weight: 300;'> | |
| Transform AI-Generated Text into Authentic Academic Writing | |
| </p> | |
| </div> | |
| """) | |
| input_text = gr.Textbox( | |
| label="📝 AI-Generated Text", | |
| lines=10, | |
| placeholder="Paste your AI-generated text here..." | |
| ) | |
| with gr.Row(): | |
| clear_btn = gr.Button("🗑️ Clear") | |
| submit_btn = gr.Button("✨ Humanize Text") | |
| output_text = gr.Textbox( | |
| label="✨ Humanized Academic Text", | |
| lines=12 | |
| ) | |
| submit_btn.click( | |
| fn=convert_text_gradio, | |
| inputs=input_text, | |
| outputs=output_text | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("", ""), | |
| inputs=None, | |
| outputs=[input_text, output_text] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |