detectNanoBananaImage2

Running on Zero

File size: 11,130 Bytes

2cda712

# Importing Libraires
import numpy as np
import pandas as pd
import math
import scipy
from PIL import Image
import random

import torch
import torchvision
from torchvision import transforms

import os,sys,warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
import pathlib
import functions.utils as utils
import defaults


# Saving Images Paths for UnivFD dataset
def save_all_images_path_UnivFD(
	imgs_dir:str,
	status:str,
	save_path:str,
	replace:bool
):
	if os.path.exists(save_path) == False or replace == True:
		# Dataset
		dataset_images_paths = {}

		# For each UnivFD image-source for the given status
		for _,source in tqdm(enumerate(defaults.All_UnivFD_Sources[status])):
			dataset_images_paths[source] = {}

			# Initializing
			for label in ["fake", "real"]:
				dataset_images_paths[source][label] = []

			# Images Source Directory
			source_images_dir = os.path.join(imgs_dir, status, source)

			# For each label or category
			if ("0_real" in os.listdir(source_images_dir)) and ("1_fake" in os.listdir(source_images_dir)):

				# For each label 
				for _,label in enumerate(os.listdir(source_images_dir)):
					if label == "0_real":
						img_label = "real"
					elif label == "1_fake":
						img_label = "fake"
					else:
						assert False, "Unknown Label encountered."

					for fname in os.listdir(os.path.join(source_images_dir, label)):
						# Image Path and Label
						img_path = os.path.join(source_images_dir, label, fname)

						# Appending
						dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))

			else:
				# For each category
				for _,category in enumerate(os.listdir(source_images_dir)):
					
					# For each label 
					for _,label in enumerate(os.listdir(os.path.join(source_images_dir, category))):
						if label == "0_real":
							img_label = "real"
						elif label == "1_fake":
							img_label = "fake"
						else:
							assert False, "Unknown Label encountered."

						for fname in os.listdir(os.path.join(source_images_dir, category, label)):
							# Image Path and Label
							img_path = os.path.join(source_images_dir, category, label, fname)

							# Appending
							dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))
		
		# Saving
		np.save(save_path, dataset_images_paths)


# Saving Images Paths for GenImage dataset
def save_all_images_path_GenImage(
	imgs_dir:str,
	status:str,
	save_path:str,
	replace:bool
):
	if os.path.exists(save_path) == False or replace == True:
		# Dataset
		dataset_images_paths = {}

		# For each GenImage image-source for the given status
		for _,source in tqdm(enumerate(defaults.All_GenImage_Sources[status])):
			dataset_images_paths[source] = {}

			# Initializing
			for label in ["fake", "real"]:
				dataset_images_paths[source][label] = []

			# Images Source Directory
			source_images_dir = os.path.join(imgs_dir, source, status)

			# For each label
			for _,label in enumerate(os.listdir(source_images_dir)):
				if label == "nature":
					img_label = "real"
				elif label == "ai":
					img_label = "fake"
				elif (label == "ai_reconstructed_inpainting" or label == "nature_reconstructed_inpainting") and status == "train" and source == "sdv4":
					print ("Encountered label:{} for status:{} and source:{}".format(label, status, source))
					img_label = "fake"
				else:
					assert False, "Unknown Label encountered."

				for fname in os.listdir(os.path.join(source_images_dir, label)):
					# Image Path and Label
					img_path = os.path.join(source_images_dir, label, fname)

					# Appending
					dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))
		
		# Saving
		np.save(save_path, dataset_images_paths)


# Saving Images Paths for DRCT dataset
def save_all_images_path_DRCT(
	imgs_dir:str,
	status:str,
	save_path:str,
	replace:bool
):
	if os.path.exists(save_path) == False or replace == True:
		# Dataset
		dataset_images_paths = {}

		# For Training Dataset: Real Images, Fake Images, Real Reconstructed Images and Fake Reconstructed Imagees
		if status == "train":
			# For each image-source
			for _,source in tqdm(enumerate(defaults.All_DRCT_Sources[status])):
				dataset_images_paths[source] = {}

				# Initializing
				for label in ["fake", "real"]:
					dataset_images_paths[source][label] = []

				# Images Source Directory
				real_images_dir = os.path.join(imgs_dir, "real_images", "{}2017".format(status))
				fake_images_dirs = [
					os.path.join(imgs_dir, "fake_images", source, "{}2017".format(status)),
					os.path.join(imgs_dir, "fake_reconstructed_images", source, "{}2017".format(status)),
					os.path.join(imgs_dir, "real_reconstructed_images", source, "{}2017".format(status)),
				]

				# Real Images Paths
				img_label = "real"
				for fname in os.listdir(real_images_dir):
					# Image Path and Label
					img_path = os.path.join(real_images_dir, fname)

					# Appending
					dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))

				# Fake Images Paths
				img_label = "fake"
				for i in range(len(fake_images_dirs)):
					for fname in os.listdir(fake_images_dirs[i]):
						# Image Path and Label
						img_path = os.path.join(fake_images_dirs[i], fname)

						# Appending
						dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))

		# For Validation Dataset: Real Images, Fake Images
		else:
			# For each image-source
			for _,source in tqdm(enumerate(defaults.All_DRCT_Sources[status])):
				dataset_images_paths[source] = {}

				# Initializing
				for label in ["fake", "real"]:
					dataset_images_paths[source][label] = []

				# Images Source Directory
				real_images_dir = os.path.join(imgs_dir, "real_images", "{}2017".format(status))
				fake_images_dir = os.path.join(imgs_dir, "fake_images", source, "{}2017".format(status))

				# Real Images Paths
				img_label = "real"
				for fname in os.listdir(real_images_dir):
					# Image Path and Label
					img_path = os.path.join(real_images_dir, fname)

					# Appending
					dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))

				# Fake Images Paths
				img_label = "fake"
				for fname in os.listdir(fake_images_dir):
					# Image Path and Label
					img_path = os.path.join(fake_images_dir, fname)

					# Appending
					dataset_images_paths[source][img_label].append(os.path.relpath(img_path, defaults.main_dataset_dir))
		
		# Saving
		np.save(save_path, dataset_images_paths)


# Saving all paths of image dataset
def save_all_images_paths(
	imgs_dir:str,
	dataset_type:str,
	status:str,
	save_path:str,
	replace:bool
):
	"""
	Saves path info images of a dataset_type, status, image_sources.
	Args:
		imgs_dir (str): Directory of images.
		dataset_type (str): Type of Dataset. Options: ["UnivFD", "GenImage", "DRCT]
		status (str): ["train", "val"]
		save_path (str): Path to save .npy file.
		replace (bool): Replace File if True.
	"""
	# Assertions
	assert dataset_type in ["UnivFD", "GenImage", "DRCT"], "Invalid dataset"
	assert os.path.exists(imgs_dir), f"Image directory {imgs_dir} is not found."
	assert status in ["train", "val"], "Invalid status"

	if dataset_type == "UnivFD":
		save_all_images_path_UnivFD(
			imgs_dir=imgs_dir,
			status=status,
			save_path=save_path,
			replace=replace
		)
	elif dataset_type == "GenImage":
		save_all_images_path_GenImage(
			imgs_dir=imgs_dir,
			status=status,
			save_path=save_path,
			replace=replace
		)
	else:
		save_all_images_path_DRCT(
			imgs_dir=imgs_dir,
			status=status,
			save_path=save_path,
			replace=replace
		)


# Get Images Paths
def get_image_paths(
	dataset_type:str,
	status:str,
	image_sources:str,
	label:str,
):
	"""
	Get path to all images in the folder based on arguments.
	Args:
		dataset_type (str): Type of Dataset. Options: ["UnivFD", "GenImage", "DRCT]
		status (str): ["train", "val"]
		image_sources (list): Image-Sources to consider for dataset.
		label (str): ["real", "fake"]
	"""
	# Assertions
	assert dataset_type in ["UnivFD", "GenImage", "DRCT"], "Invalid dataset"
	assert status in ["train", "val"], "Invalid status"
	assert label in ["real", "fake"], "Invalid label"


	# Loading Paths
	img_dir = os.path.join(defaults.main_dataset_dir, dataset_type, "dataset")
	info_path = os.path.join(defaults.main_dataset_dir, "Info", "{}_{}_image_Paths.npy".format(dataset_type, status))


	# Saving Info File
	if os.path.exists(info_path) == False:
		print ("Saving Info File")

		save_all_images_paths(
			imgs_dir=img_dir,
			dataset_type=dataset_type,
			status=status,
			save_path=info_path,
			replace=False
		)
	

	# Loading Path Info
	Path_Info = np.load(info_path, allow_pickle=True)[()]
	

	# Dataset
	dataset_images_paths = []
	# For each image-source
	for _, source in enumerate(image_sources):
		for img_path in sorted(Path_Info[source][label]):
			# Image-Path
			dataset_images_paths.append(img_path)

	return dataset_images_paths



# Dataset Paths
def dataset_img_paths(
	dataset_type:str,
	status:str
):
	"""
	Returns real_image_paths and fake_image_paths based on arguments.
	Args:
		dataset_type (str): Type of Dataset. Options: ["UnivFD", "GenImage", "DTCT]
		status (str): ["train", "val"]
	"""
	# Assertions
	assert dataset_type in ["UnivFD", "GenImage", "DRCT"], "Invalid dataset"
	assert status in ["train", "val"], "Invalid status"

	# DRCT Dataset
	if dataset_type == "DRCT":
		train_image_sources, test_image_sources = utils.get_DRCT_options()

		if status == "train":
			image_sources = train_image_sources
		else:
			image_sources = test_image_sources

		real_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="real"
		)

		fake_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="fake"
		)

	# GenImage Dataset
	elif dataset_type == "GenImage":
		train_image_sources, test_image_sources = utils.get_GenImage_options()

		if status == "train":
			image_sources = train_image_sources
		else:
			image_sources = test_image_sources

		real_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="real"
		)

		fake_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="fake"
		)

	# UnivFD Dataset
	elif dataset_type == "UnivFD":
		train_image_sources, test_image_sources = utils.get_UnivFD_options()

		if status == "train":
			image_sources = train_image_sources
		else:
			image_sources = test_image_sources

		real_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="real"
		)

		fake_images_paths = get_image_paths(
			dataset_type=dataset_type,
			status=status,
			image_sources=image_sources,
			label="fake"
		)

	else:
		assert False, "Unknown dataset_type: {}".format(dataset_type)

	return real_images_paths, fake_images_paths