🧪 Add Comprehensive testing suite

0dfdd57 verified 9 months ago

11 kB

	"""
	Comprehensive testing suite for rmtariq/multilingual-emotion-classifier
	This script provides various testing capabilities for the emotion classification model.

	Usage:
	python test_model.py --test-type [quick\|comprehensive\|interactive\|benchmark]

	Author: rmtariq
	Repository: https://huggingface.co/rmtariq/multilingual-emotion-classifier
	"""

	import argparse
	import time
	from transformers import pipeline
	import torch

	class EmotionModelTester:
	"""Comprehensive testing suite for the multilingual emotion classifier"""

	def __init__(self, model_name="rmtariq/multilingual-emotion-classifier"):
	self.model_name = model_name
	self.classifier = None
	self.load_model()

	def load_model(self):
	"""Load the emotion classification model"""
	print(f"📥 Loading model: {self.model_name}")
	try:
	self.classifier = pipeline(
	"text-classification",
	model=self.model_name,
	device=0 if torch.cuda.is_available() else -1
	)
	device = "GPU" if torch.cuda.is_available() else "CPU"
	print(f"✅ Model loaded successfully on {device}")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise

	def quick_test(self):
	"""Quick test with essential examples"""
	print("\n🚀 QUICK TEST")
	print("=" * 50)

	test_cases = [
	# English examples
	("I am so happy today!", "happy", "🇬🇧"),
	("This makes me really angry!", "anger", "🇬🇧"),
	("I love you so much!", "love", "🇬🇧"),
	("I'm scared of spiders", "fear", "🇬🇧"),
	("This news makes me sad", "sadness", "🇬🇧"),
	("What a surprise!", "surprise", "🇬🇧"),

	# Malay examples
	("Saya sangat gembira!", "happy", "🇲🇾"),
	("Aku marah dengan keadaan ini", "anger", "🇲🇾"),
	("Aku sayang kamu", "love", "🇲🇾"),
	("Saya takut dengan ini", "fear", "🇲🇾"),

	# Previously problematic cases (now fixed)
	("Ini adalah hari jadi terbaik", "happy", "🇲🇾"),
	("Terbaik!", "happy", "🇲🇾"),
	("Ini adalah hari yang baik", "happy", "🇲🇾")
	]

	correct = 0
	total = len(test_cases)

	for i, (text, expected, flag) in enumerate(test_cases, 1):
	result = self.classifier(text)
	predicted = result[0]['label'].lower()
	confidence = result[0]['score']

	is_correct = predicted == expected
	if is_correct:
	correct += 1

	status = "✅" if is_correct else "❌"
	print(f"{i:2d}. {status} {flag} '{text[:40]}...'")
	print(f" → {predicted} ({confidence:.1%}) [Expected: {expected}]")

	accuracy = correct / total
	print(f"\n📊 Quick Test Results: {accuracy:.1%} ({correct}/{total})")

	if accuracy >= 0.9:
	print("🎉 EXCELLENT! Model performing at high level!")
	elif accuracy >= 0.8:
	print("👍 GOOD! Model performing well!")
	else:
	print("⚠️ NEEDS ATTENTION. Some issues detected.")

	return accuracy

	def comprehensive_test(self):
	"""Comprehensive test covering all aspects"""
	print("\n🔬 COMPREHENSIVE TEST")
	print("=" * 50)

	# Test categories
	test_categories = {
	"English Basic": [
	("I feel fantastic today!", "happy"),
	("I'm furious about this!", "anger"),
	("I adore this place!", "love"),
	("I'm terrified of heights", "fear"),
	("I'm heartbroken", "sadness"),
	("I can't believe it!", "surprise")
	],
	"Malay Basic": [
	("Gembira sangat hari ini", "happy"),
	("Marah betul dengan dia", "anger"),
	("Sayang sangat kat kamu", "love"),
	("Takut gila dengan benda tu", "fear"),
	("Sedih betul dengar berita", "sadness"),
	("Terkejut dengan kejadian", "surprise")
	],
	"Malay Fixed Issues": [
	("Ini adalah hari jadi terbaik", "happy"),
	("Hari jadi terbaik saya", "happy"),
	("Terbaik!", "happy"),
	("Hari yang baik", "happy"),
	("Pengalaman terbaik", "happy"),
	("Masa terbaik", "happy")
	],
	"Edge Cases": [
	("Happy birthday!", "happy"),
	("Best day ever!", "happy"),
	("Good news!", "happy"),
	("Selamat hari jadi", "happy"),
	("Berita baik", "happy"),
	("Hasil terbaik", "happy")
	]
	}

	overall_correct = 0
	overall_total = 0

	for category, cases in test_categories.items():
	print(f"\n📋 {category}:")
	print("-" * 30)

	category_correct = 0
	for text, expected in cases:
	result = self.classifier(text)
	predicted = result[0]['label'].lower()
	confidence = result[0]['score']

	is_correct = predicted == expected
	if is_correct:
	category_correct += 1
	overall_correct += 1

	overall_total += 1

	status = "✅" if is_correct else "❌"
	print(f" {status} '{text[:35]}...' → {predicted} ({confidence:.1%})")

	category_accuracy = category_correct / len(cases)
	print(f" 📊 {category} Accuracy: {category_accuracy:.1%}")

	overall_accuracy = overall_correct / overall_total
	print(f"\n📊 COMPREHENSIVE TEST RESULTS:")
	print(f"✅ Overall Accuracy: {overall_accuracy:.1%} ({overall_correct}/{overall_total})")

	return overall_accuracy

	def interactive_test(self):
	"""Interactive testing mode"""
	print("\n🎮 INTERACTIVE TEST MODE")
	print("=" * 50)
	print("Enter text to classify emotions (type 'quit' to exit)")
	print("Supported emotions: anger, fear, happy, love, sadness, surprise")
	print()

	while True:
	try:
	text = input("💬 Your text: ").strip()

	if text.lower() in ['quit', 'exit', 'q']:
	print("👋 Goodbye!")
	break

	if not text:
	continue

	result = self.classifier(text)
	predicted = result[0]['label'].lower()
	confidence = result[0]['score']

	# Get emoji for emotion
	emotion_emojis = {
	'anger': '😠', 'fear': '😨', 'happy': '😊',
	'love': '❤️', 'sadness': '😢', 'surprise': '😲'
	}

	emoji = emotion_emojis.get(predicted, '🤔')
	confidence_level = "💪 High" if confidence > 0.9 else "👍 Good" if confidence > 0.7 else "⚠️ Low"

	print(f"🎭 Result: {emoji} {predicted}")
	print(f"📊 Confidence: {confidence:.1%}")
	print(f"💪 {confidence_level} confidence!")
	print()

	except KeyboardInterrupt:
	print("\n👋 Goodbye!")
	break
	except Exception as e:
	print(f"❌ Error: {e}")

	def benchmark_test(self):
	"""Performance benchmark test"""
	print("\n⚡ BENCHMARK TEST")
	print("=" * 50)

	# Test texts for benchmarking
	benchmark_texts = [
	"I am so happy today!",
	"This makes me angry!",
	"I love this!",
	"I'm scared!",
	"This is sad news",
	"What a surprise!",
	"Saya gembira!",
	"Aku marah!",
	"Sayang betul!",
	"Takut sangat!"
	] * 10 # 100 predictions total

	print(f"🔄 Running {len(benchmark_texts)} predictions...")

	start_time = time.time()

	for text in benchmark_texts:
	_ = self.classifier(text)

	end_time = time.time()
	total_time = end_time - start_time
	avg_time = total_time / len(benchmark_texts)
	predictions_per_second = len(benchmark_texts) / total_time

	print(f"📊 BENCHMARK RESULTS:")
	print(f"⏱️ Total time: {total_time:.2f} seconds")
	print(f"⚡ Average per prediction: {avg_time*1000:.1f} ms")
	print(f"🚀 Predictions per second: {predictions_per_second:.1f}")

	if predictions_per_second > 10:
	print("🎉 EXCELLENT! Very fast performance!")
	elif predictions_per_second > 5:
	print("👍 GOOD! Acceptable performance!")
	else:
	print("⚠️ SLOW. Consider optimization.")

	return predictions_per_second

	def main():
	"""Main testing function"""
	parser = argparse.ArgumentParser(description="Test the multilingual emotion classifier")
	parser.add_argument(
	"--test-type",
	choices=["quick", "comprehensive", "interactive", "benchmark", "all"],
	default="quick",
	help="Type of test to run"
	)
	parser.add_argument(
	"--model",
	default="rmtariq/multilingual-emotion-classifier",
	help="Model name or path"
	)

	args = parser.parse_args()

	print("🎭 MULTILINGUAL EMOTION CLASSIFIER TESTING SUITE")
	print("=" * 60)
	print(f"Model: {args.model}")
	print(f"Test Type: {args.test_type}")

	try:
	tester = EmotionModelTester(args.model)

	if args.test_type == "quick":
	tester.quick_test()
	elif args.test_type == "comprehensive":
	tester.comprehensive_test()
	elif args.test_type == "interactive":
	tester.interactive_test()
	elif args.test_type == "benchmark":
	tester.benchmark_test()
	elif args.test_type == "all":
	print("🔄 Running all tests...")
	tester.quick_test()
	tester.comprehensive_test()
	tester.benchmark_test()
	print("\n🎮 Starting interactive mode...")
	tester.interactive_test()

	except Exception as e:
	print(f"❌ Testing failed: {e}")
	return 1

	return 0

	if __name__ == "__main__":
	exit(main())