File size: 10,270 Bytes
d9c7b8a
94421ed
d9c7b8a
 
 
94421ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9c7b8a
 
94421ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9c7b8a
94421ed
 
d9c7b8a
 
94421ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
"""
Test all available models on the same image with latency measurements
"""
import os
import sys
import time

if __name__ == '__main__':
    # Available models - test all 5 IQA-based models
    models = ['contrique','hyperiqa', 'tres', 'arniqa', 'reiqa']
    models = ['reiqa']
    # models = ['reiqa', 'arniqa']
    # models = ['contrique', 'hyperiqa', 'tres', 'reiqa', 'arniqa']

    """
      ---
  Summary Table - All Models

  | Model     | Image 1 (11.14.39 PM) | Image 2 (5.50.50
  PM)  | Verdict                     |
  |-----------|-----------------------|--------------------
  ---|-----------------------------|
  | CONTRIQUE | 0.7931 (AI - 58.6%)   | 0.6332 (AI - 26.6%)
     | ✓ Both AI-Generated         |
  | HYPERIQA  | 0.7602 (AI - 52.0%)   | 0.8179 (AI - 63.6%)
     | ✓ Both AI-Generated         |
  | TRES      | ❌ Failed              | ❌ Failed
       | Model incompatible with CPU |
  | REIQA     | 0.3500 (Real - 30.0%) | 0.2416 (Real -
  51.7%) | ✗ Both Real                 |
  | ARNIQA    | 0.7133 (AI - 42.7%)   | 0.9605 (AI - 92.1%)
     | ✓ Both AI-Generated         |

  ---
    """

    # Test images directory
    test_images_dir = "new_images_to_test"

    # Get all images from the directory
    import glob
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
    test_images = []
    for ext in image_extensions:
        test_images.extend([os.path.abspath(p) for p in glob.glob(os.path.join(test_images_dir, ext))])

    if not test_images:
        print(f"Error: No images found in {test_images_dir}/")
        sys.exit(1)

    print(f"Found {len(test_images)} image(s) in {test_images_dir}/")
    print("=" * 80)

    # Import libraries once
    sys.path.insert(0, '.')
    from yaml import safe_load
    from functions.loss_optimizers_metrics import *
    from functions.run_on_images_fn import run_on_images
    import functions.utils as utils
    import functions.networks as networks
    import defaults
    import warnings
    warnings.filterwarnings("ignore")

    all_results = {}
    latency_stats = {}

    # Test each model
    for model_idx, model_name in enumerate(models, 1):
        print(f"\n{'='*80}")
        print(f"[{model_idx}/{len(models)}] Testing model: {model_name.upper()}")
        print("="*80)

        # Start timing model loading
        model_load_start = time.time()

        try:
            config_path = f"configs/{model_name}.yaml"
            config = safe_load(open(config_path, "r"))

            # Override settings
            config["dataset"]["dataset_type"] = "GenImage"
            config["checkpoints"]["resume_dirname"] = "GenImage/extensive/MarginContrastiveLoss_CrossEntropy"
            config["checkpoints"]["resume_filename"] = "best_model.ckpt"
            config["checkpoints"]["checkpoint_dirname"] = "extensive/MarginContrastiveLoss_CrossEntropy"
            config["checkpoints"]["checkpoint_filename"] = "best_model.ckpt"

            # Training settings (for testing)
            config["train_settings"]["train"] = False
            config["train_loss_fn"]["name"] = "CrossEntropy"
            config["val_loss_fn"]["name"] = "CrossEntropy"

            # Model setup - use CPU (MPS has compatibility issues)
            device = "cpu"
            feature_extractor = networks.get_model(model_name=model_name, device=device)

            # Classifier
            config["classifier"]["hidden_layers"] = [1024]
            classifier = networks.Classifier_Arch2(
                input_dim=config["classifier"]["input_dim"],
                hidden_layers=config["classifier"]["hidden_layers"]
            )

            # Preprocessing settings
            preprocess_settings = {
                "model_name": model_name,
                "selected_transforms_name": "test",
                "probability": -1,
                "gaussian_blur_range": None,
                "jpeg_compression_qfs": None,
                "input_image_dimensions": (224, 224),
                "resize": None
            }

            model_load_time = time.time() - model_load_start
            print(f"✓ {model_name.upper()} model loaded successfully (Load time: {model_load_time:.3f}s)\n")

            results = []
            inference_times = []

            # Test each image with this model
            for idx, test_image in enumerate(test_images, 1):
                image_name = os.path.basename(test_image)
                print(f"  [{idx}/{len(test_images)}] Testing: {image_name}")

                # Test images
                test_real_images_paths = [test_image]
                test_fake_images_paths = []

                try:
                    # Start timing inference
                    inference_start = time.time()

                    test_set_metrics, best_threshold, y_pred, y_true = run_on_images(
                        feature_extractor=feature_extractor,
                        classifier=classifier,
                        config=config,
                        test_real_images_paths=test_real_images_paths,
                        test_fake_images_paths=test_fake_images_paths,
                        preprocess_settings=preprocess_settings,
                        best_threshold=0.5,
                        verbose=False
                    )

                    inference_time = time.time() - inference_start
                    inference_times.append(inference_time)

                    score = y_pred[0] if len(y_pred) > 0 else None
                    prediction = "AI-Generated" if score and score > 0.5 else "Real"
                    confidence = abs(score - 0.5) * 200 if score else 0

                    results.append({
                        'image': image_name,
                        'score': score,
                        'prediction': prediction,
                        'confidence': confidence,
                        'inference_time': inference_time
                    })

                    print(f"    ✓ Score: {score:.4f}{prediction} ({confidence:.1f}% confidence) | Time: {inference_time:.3f}s")

                except Exception as e:
                    print(f"    ✗ Error: {e}")
                    results.append({
                        'image': image_name,
                        'score': None,
                        'prediction': 'Error',
                        'confidence': 0,
                        'inference_time': None
                    })

            all_results[model_name] = results

            # Store latency statistics
            if inference_times:
                latency_stats[model_name] = {
                    'model_load_time': model_load_time,
                    'avg_inference_time': sum(inference_times) / len(inference_times),
                    'min_inference_time': min(inference_times),
                    'max_inference_time': max(inference_times),
                    'total_inference_time': sum(inference_times),
                    'num_images': len(inference_times)
                }

        except Exception as e:
            print(f"✗ Failed to load {model_name.upper()} model: {e}")
            all_results[model_name] = None

    # Final Summary
    print("\n" + "="*80)
    print("FINAL SUMMARY - ALL MODELS")
    print("="*80)

    for model_name, results in all_results.items():
        if results is None:
            print(f"\n{model_name.upper()}: Failed to load")
            continue

        print(f"\n{model_name.upper()}:")
        print("-"*80)
        print(f"{'Image':<50} {'Score':<10} {'Prediction':<15} {'Confidence':<12}")
        print("-"*80)

        for r in results:
            score_str = f"{r['score']:.4f}" if r['score'] is not None else "N/A"
            conf_str = f"{r['confidence']:.1f}%" if r['score'] is not None else "N/A"
            img_name = r['image'][:47] + "..." if len(r['image']) > 50 else r['image']
            print(f"{img_name:<50} {score_str:<10} {r['prediction']:<15} {conf_str:<12}")

        # Statistics
        valid_predictions = [r for r in results if r['score'] is not None]
        if valid_predictions:
            avg_score = sum(r['score'] for r in valid_predictions) / len(valid_predictions)
            ai_count = sum(1 for r in valid_predictions if r['score'] > 0.5)
            real_count = len(valid_predictions) - ai_count
            avg_confidence = sum(r['confidence'] for r in valid_predictions) / len(valid_predictions)

            print("-"*80)
            print(f"Average Score: {avg_score:.4f} | AI: {ai_count} | Real: {real_count} | Avg Confidence: {avg_confidence:.1f}%")

    # Latency Summary
    print("\n" + "="*80)
    print("LATENCY PERFORMANCE COMPARISON")
    print("="*80)

    if latency_stats:
        print(f"\n{'Model':<15} {'Load Time':<12} {'Avg Inference':<15} {'Min':<10} {'Max':<10} {'Total':<12}")
        print("-"*80)

        for model_name, stats in latency_stats.items():
            print(f"{model_name.upper():<15} "
                  f"{stats['model_load_time']:<12.3f} "
                  f"{stats['avg_inference_time']:<15.3f} "
                  f"{stats['min_inference_time']:<10.3f} "
                  f"{stats['max_inference_time']:<10.3f} "
                  f"{stats['total_inference_time']:<12.3f}")

        print("\n" + "-"*80)
        print("Timing units: seconds (s)")
        print("Load Time: Time to load model and classifier")
        print("Avg Inference: Average time per image inference")
        print("Min/Max: Fastest/slowest single image inference")
        print("Total: Total time for all images")

        # Find fastest model
        fastest_model = min(latency_stats.items(), key=lambda x: x[1]['avg_inference_time'])
        slowest_model = max(latency_stats.items(), key=lambda x: x[1]['avg_inference_time'])

        print("\n" + "-"*80)
        print(f"⚡ Fastest Model: {fastest_model[0].upper()} ({fastest_model[1]['avg_inference_time']:.3f}s per image)")
        print(f"🐌 Slowest Model: {slowest_model[0].upper()} ({slowest_model[1]['avg_inference_time']:.3f}s per image)")

        speedup = slowest_model[1]['avg_inference_time'] / fastest_model[1]['avg_inference_time']
        print(f"📊 Speed Difference: {speedup:.2f}x faster")

    print("\n" + "="*80)