Spaces:
Running
on
Zero
Running
on
Zero
| from PIL import Image | |
| import matplotlib | |
| import numpy as np | |
| from typing import List | |
| import csv | |
| import cv2 | |
| from PIL import Image | |
| import torch | |
| from torchvision.transforms import InterpolationMode | |
| from torchvision.transforms.functional import resize | |
| def numpy_to_pil(images: np.ndarray) -> List[Image.Image]: | |
| r""" | |
| Convert a numpy image or a batch of images to a PIL image. | |
| Args: | |
| images (`np.ndarray`): | |
| The image array to convert to PIL format. | |
| Returns: | |
| `List[PIL.Image.Image]`: | |
| A list of PIL images. | |
| """ | |
| if images.ndim == 3: | |
| images = images[None, ...] | |
| images = (images * 255).round().astype("uint8") | |
| if images.shape[-1] == 1: | |
| # special case for grayscale (single channel) images | |
| pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] | |
| else: | |
| pil_images = [Image.fromarray(image) for image in images] | |
| return pil_images | |
| def resize_output(image, target_size): | |
| """ | |
| Resize output image to target size | |
| Args: | |
| image: Image in PIL.Image, numpy.array or torch.tensor format | |
| target_size: tuple, target size (H, W) | |
| Returns: | |
| Resized image in original format | |
| """ | |
| if isinstance(image, list): | |
| return [resize_output(img, target_size) for img in image] | |
| if isinstance(image, Image.Image): | |
| return image.resize(target_size[::-1], Image.BILINEAR) | |
| elif isinstance(image, np.ndarray): | |
| # Handle numpy array with shape (1, H, W, 3) | |
| if image.ndim == 4: | |
| resized = np.stack([cv2.resize(img, target_size[::-1]) for img in image]) | |
| return resized | |
| else: | |
| return cv2.resize(image, target_size[::-1]) | |
| elif isinstance(image, torch.Tensor): | |
| # Handle tensor with shape (1, 3, H, W) | |
| if image.dim() == 4: | |
| return torch.nn.functional.interpolate( | |
| image, | |
| size=target_size, | |
| mode='bilinear', | |
| align_corners=False | |
| ) | |
| else: | |
| return torch.nn.functional.interpolate( | |
| image.unsqueeze(0), | |
| size=target_size, | |
| mode='bilinear', | |
| align_corners=False | |
| ).squeeze(0) | |
| else: | |
| raise ValueError(f"Unsupported image format: {type(image)}") | |
| def resize_image(image, target_size): | |
| """ | |
| Resize output image to target size | |
| Args: | |
| image: Image in PIL.Image, numpy.array or torch.tensor format | |
| target_size: tuple, target size (H, W) | |
| Returns: | |
| Resized image in original format | |
| """ | |
| if isinstance(image, list): | |
| return [resize_image(img, target_size) for img in image] | |
| if isinstance(image, Image.Image): | |
| return image.resize(target_size[::-1], Image.BILINEAR) | |
| elif isinstance(image, np.ndarray): | |
| # Handle numpy array with shape (1, H, W, 3) | |
| if image.ndim == 4: | |
| resized = np.stack([cv2.resize(img, target_size[::-1]) for img in image]) | |
| return resized | |
| else: | |
| return cv2.resize(image, target_size[::-1]) | |
| elif isinstance(image, torch.Tensor): | |
| # Handle tensor with shape (1, 3, H, W) | |
| if image.dim() == 4: | |
| return torch.nn.functional.interpolate( | |
| image, | |
| size=target_size, | |
| mode='bilinear', | |
| align_corners=False | |
| ) | |
| else: | |
| return torch.nn.functional.interpolate( | |
| image.unsqueeze(0), | |
| size=target_size, | |
| mode='bilinear', | |
| align_corners=False | |
| ).squeeze(0) | |
| else: | |
| raise ValueError(f"Unsupported image format: {type(image)}") | |
| def resize_image_first(image_tensor, process_res=None): | |
| if process_res: | |
| max_edge = max(image_tensor.shape[2], image_tensor.shape[3]) | |
| if max_edge > process_res: | |
| scale = process_res / max_edge | |
| new_height = int(image_tensor.shape[2] * scale) | |
| new_width = int(image_tensor.shape[3] * scale) | |
| image_tensor = resize_image(image_tensor, (new_height, new_width)) | |
| image_tensor = resize_to_multiple_of_16(image_tensor) | |
| return image_tensor | |
| def smooth_image(image, method='gaussian', kernel_size=31, sigma=15.0, bilateral_d=9, bilateral_color=75, bilateral_space=75): | |
| """ | |
| 应用多种平滑方法来消除图像中的网格伪影 | |
| Args: | |
| image: PIL.Image, numpy.array 或 torch.tensor 格式的图像 | |
| method: 平滑方法,可选 'gaussian'(高斯模糊), 'bilateral'(双边滤波), 'median'(中值滤波), | |
| 'guided'(引导滤波), 'strong'(结合多种滤波的强力平滑) | |
| kernel_size: 高斯和中值滤波的核大小,默认为31,应为奇数 | |
| sigma: 高斯滤波的标准差,默认为15.0 | |
| bilateral_d: 双边滤波的直径,默认为9 | |
| bilateral_color: 双边滤波的颜色空间标准差,默认为75 | |
| bilateral_space: 双边滤波的坐标空间标准差,默认为75 | |
| Returns: | |
| 平滑后的图像,保持原始格式 | |
| """ | |
| if isinstance(image, list): | |
| return [smooth_image(img, method, kernel_size, sigma, bilateral_d, bilateral_color, bilateral_space) for img in image] | |
| # 确保kernel_size是奇数 | |
| if kernel_size % 2 == 0: | |
| kernel_size += 1 | |
| # 转换为numpy数组进行处理 | |
| is_pil = isinstance(image, Image.Image) | |
| is_tensor = isinstance(image, torch.Tensor) | |
| if is_pil: | |
| img_array = np.array(image) | |
| elif is_tensor: | |
| device = image.device | |
| if image.dim() == 4: # (B, C, H, W) | |
| batch_size, channels, height, width = image.shape | |
| img_array = image.permute(0, 2, 3, 1).cpu().numpy() # (B, H, W, C) | |
| else: # (C, H, W) | |
| img_array = image.permute(1, 2, 0).cpu().numpy() # (H, W, C) | |
| else: | |
| img_array = image | |
| # 保存原始数据类型 | |
| original_dtype = img_array.dtype | |
| # 应用选定的平滑方法 | |
| if method == 'gaussian': | |
| # 标准高斯模糊,适合轻微平滑 | |
| if img_array.ndim == 4: | |
| smoothed = np.stack([cv2.GaussianBlur(img, (kernel_size, kernel_size), sigma) for img in img_array]) | |
| else: | |
| smoothed = cv2.GaussianBlur(img_array, (kernel_size, kernel_size), sigma) | |
| elif method == 'bilateral': | |
| # 双边滤波,保持边缘的同时平滑平坦区域 | |
| if img_array.ndim == 4: | |
| # 确保图像是8位类型 | |
| imgs_uint8 = [img.astype(np.uint8) if img.dtype != np.uint8 else img for img in img_array] | |
| smoothed = np.stack([cv2.bilateralFilter(img, bilateral_d, bilateral_color, bilateral_space) for img in imgs_uint8]) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| smoothed = smoothed.astype(original_dtype) | |
| else: | |
| # 确保图像是8位类型 | |
| img_uint8 = img_array.astype(np.uint8) if img_array.dtype != np.uint8 else img_array | |
| smoothed = cv2.bilateralFilter(img_uint8, bilateral_d, bilateral_color, bilateral_space) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| smoothed = smoothed.astype(original_dtype) | |
| elif method == 'median': | |
| # 中值滤波,对于消除盐和胡椒噪声和小格子非常有效 | |
| # 中值滤波要求输入为uint8或uint16 | |
| if img_array.ndim == 4: | |
| # 转换为8位无符号整数并确保格式正确 | |
| imgs_uint8 = [] | |
| for img in img_array: | |
| # 对浮点图像进行缩放到0-255范围 | |
| if img.dtype != np.uint8: | |
| if img.max() <= 1.0: # 检查是否是0-1范围的浮点数 | |
| img = (img * 255).astype(np.uint8) | |
| else: | |
| img = img.astype(np.uint8) | |
| imgs_uint8.append(img) | |
| smoothed = np.stack([cv2.medianBlur(img, kernel_size) for img in imgs_uint8]) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| if original_dtype == np.float32 or original_dtype == np.float64: | |
| if img_array.max() <= 1.0: # 检查原始数据是否在0-1范围 | |
| smoothed = smoothed.astype(float) / 255.0 | |
| else: | |
| # 转换为8位无符号整数 | |
| if img_array.dtype != np.uint8: | |
| if img_array.max() <= 1.0: # 检查是否是0-1范围的浮点数 | |
| img_uint8 = (img_array * 255).astype(np.uint8) | |
| else: | |
| img_uint8 = img_array.astype(np.uint8) | |
| else: | |
| img_uint8 = img_array | |
| smoothed = cv2.medianBlur(img_uint8, kernel_size) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| if original_dtype == np.float32 or original_dtype == np.float64: | |
| if img_array.max() <= 1.0: # 检查原始数据是否在0-1范围 | |
| smoothed = smoothed.astype(float) / 255.0 | |
| else: | |
| smoothed = smoothed.astype(original_dtype) | |
| elif method == 'guided': | |
| # 引导滤波,在保持边缘的同时平滑区域 | |
| if img_array.ndim == 4: | |
| smoothed = np.stack([cv2.ximgproc.guidedFilter( | |
| guide=img, src=img, radius=kernel_size//2, eps=1e-6) for img in img_array]) | |
| else: | |
| smoothed = cv2.ximgproc.guidedFilter( | |
| guide=img_array, src=img_array, radius=kernel_size//2, eps=1e-6) | |
| elif method == 'strong': | |
| # 强力平滑:先应用中值滤波去除尖锐噪点,然后用双边滤波保持边缘,最后用高斯进一步平滑 | |
| if img_array.ndim == 4: | |
| # 转换为8位无符号整数 | |
| imgs_uint8 = [] | |
| for img in img_array: | |
| # 对浮点图像进行缩放到0-255范围 | |
| if img.dtype != np.uint8: | |
| if img.max() <= 1.0: # 检查是否是0-1范围的浮点数 | |
| img = (img * 255).astype(np.uint8) | |
| else: | |
| img = img.astype(np.uint8) | |
| imgs_uint8.append(img) | |
| temp = np.stack([cv2.medianBlur(img, min(15, kernel_size)) for img in imgs_uint8]) | |
| temp = np.stack([cv2.bilateralFilter(img, bilateral_d, bilateral_color, bilateral_space) for img in temp]) | |
| smoothed = np.stack([cv2.GaussianBlur(img, (kernel_size, kernel_size), sigma) for img in temp]) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| if original_dtype == np.float32 or original_dtype == np.float64: | |
| if img_array.max() <= 1.0: # 检查原始数据是否在0-1范围 | |
| smoothed = smoothed.astype(float) / 255.0 | |
| else: | |
| smoothed = smoothed.astype(original_dtype) | |
| else: | |
| # 转换为8位无符号整数 | |
| if img_array.dtype != np.uint8: | |
| if img_array.max() <= 1.0: # 检查是否是0-1范围的浮点数 | |
| img_uint8 = (img_array * 255).astype(np.uint8) | |
| else: | |
| img_uint8 = img_array.astype(np.uint8) | |
| else: | |
| img_uint8 = img_array | |
| temp = cv2.medianBlur(img_uint8, min(15, kernel_size)) | |
| temp = cv2.bilateralFilter(temp, bilateral_d, bilateral_color, bilateral_space) | |
| smoothed = cv2.GaussianBlur(temp, (kernel_size, kernel_size), sigma) | |
| # 转回原始类型 | |
| if original_dtype != np.uint8: | |
| if original_dtype == np.float32 or original_dtype == np.float64: | |
| if img_array.max() <= 1.0: # 检查原始数据是否在0-1范围 | |
| smoothed = smoothed.astype(float) / 255.0 | |
| else: | |
| smoothed = smoothed.astype(original_dtype) | |
| else: | |
| raise ValueError(f"不支持的平滑方法: {method},请选择 'gaussian', 'bilateral', 'median', 'guided' 或 'strong'") | |
| # 将结果转换回原始格式 | |
| if is_pil: | |
| # 如果结果是浮点类型且值在0-1之间,需要先转换为0-255的uint8 | |
| if smoothed.dtype == np.float32 or smoothed.dtype == np.float64: | |
| if smoothed.max() <= 1.0: | |
| smoothed = (smoothed * 255).astype(np.uint8) | |
| return Image.fromarray(smoothed.astype(np.uint8)) | |
| elif is_tensor: | |
| if image.dim() == 4: | |
| return torch.from_numpy(smoothed).permute(0, 3, 1, 2).to(device) | |
| else: | |
| return torch.from_numpy(smoothed).permute(2, 0, 1).to(device) | |
| else: | |
| return smoothed | |
| def resize_to_multiple_of_16(image_tensor): | |
| """ | |
| Resize image tensor to make shorter side closest multiple of 16 while maintaining aspect ratio | |
| Args: | |
| image_tensor: Input tensor of shape (B, C, H, W) | |
| Returns: | |
| Resized tensor where shorter side is multiple of 16 | |
| """ | |
| # Calculate scale ratio based on shorter side to make it closest multiple of 16 | |
| h, w = image_tensor.shape[2], image_tensor.shape[3] | |
| min_side = min(h, w) | |
| scale = (min_side // 16) * 16 / min_side | |
| # Calculate new height and width | |
| new_h = int(h * scale) | |
| new_w = int(w * scale) | |
| # Ensure both height and width are multiples of 16 | |
| new_h = (new_h // 16) * 16 | |
| new_w = (new_w // 16) * 16 | |
| # Resize image while maintaining aspect ratio | |
| resized_tensor = torch.nn.functional.interpolate( | |
| image_tensor, | |
| size=(new_h, new_w), | |
| mode='bilinear', | |
| align_corners=False | |
| ) | |
| return resized_tensor | |
| def load_color_list(csv_path): | |
| color_list = [] | |
| with open(csv_path, newline='') as file: | |
| reader = csv.reader(file) | |
| next(reader) | |
| for row in reader: | |
| last_three = tuple(map(int, row[-3:])) | |
| color_list.append(last_three) | |
| color_list = [(0,0,0)] + color_list | |
| return color_list | |
| def conver_rgb_to_semantic_map(image: Image, color_list: List): | |
| # Convert PIL Image to numpy array | |
| image_array = np.array(image) | |
| # Initialize an empty array for the indexed image | |
| indexed_image = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=int) | |
| # Loop through each pixel in the image | |
| for i in range(image_array.shape[0]): | |
| for j in range(image_array.shape[1]): | |
| # Get the color of the current pixel | |
| pixel_color = tuple(image_array[i, j][:3]) # Exclude the alpha channel if present | |
| # Find the closest color from the color list and get its index | |
| # Here, the Euclidean distance is used to find the closest color | |
| distances = np.sqrt(np.sum((np.array(color_list) - np.array(pixel_color))**2, axis=1)) | |
| closest_color_index = np.argmin(distances) | |
| # Set the index in the indexed image | |
| indexed_image[i, j] = closest_color_index | |
| indexed_image = indexed_image - 1 | |
| return indexed_image | |
| def concatenate_images(*image_lists): | |
| # Ensure at least one image list is provided | |
| if not image_lists or not image_lists[0]: | |
| raise ValueError("At least one non-empty image list must be provided") | |
| # Determine the maximum width of any single row and the total height | |
| max_width = 0 | |
| total_height = 0 | |
| row_widths = [] | |
| row_heights = [] | |
| # Compute dimensions for each row | |
| for image_list in image_lists: | |
| if image_list: # Ensure the list is not empty | |
| width = sum(img.width for img in image_list) | |
| height = max(img.height for img in image_list) | |
| max_width = max(max_width, width) | |
| total_height += height | |
| row_widths.append(width) | |
| row_heights.append(height) | |
| # Create a new image to concatenate everything into | |
| new_image = Image.new('RGB', (max_width, total_height)) | |
| # Concatenate each row of images | |
| y_offset = 0 | |
| for i, image_list in enumerate(image_lists): | |
| x_offset = 0 | |
| for img in image_list: | |
| new_image.paste(img, (x_offset, y_offset)) | |
| x_offset += img.width | |
| y_offset += row_heights[i] # Move the offset down to the next row | |
| return new_image | |
| # def concatenate_images(image_list1, image_list2): | |
| # # Ensure both image lists are not empty | |
| # if not image_list1 or not image_list2: | |
| # raise ValueError("Image lists cannot be empty") | |
| # # Get the width and height of the first image | |
| # width, height = image_list1[0].size | |
| # # Calculate the total width and height | |
| # total_width = max(len(image_list1), len(image_list2)) * width | |
| # total_height = 2 * height # For two rows | |
| # # Create a new image to concatenate everything into | |
| # new_image = Image.new('RGB', (total_width, total_height)) | |
| # # Concatenate the first row of images | |
| # x_offset = 0 | |
| # for img in image_list1: | |
| # new_image.paste(img, (x_offset, 0)) | |
| # x_offset += img.width | |
| # # Concatenate the second row of images | |
| # x_offset = 0 | |
| # for img in image_list2: | |
| # new_image.paste(img, (x_offset, height)) | |
| # x_offset += img.width | |
| # return new_image | |
| def colorize_depth_map(depth, mask=None, reverse_color=False): | |
| cm = matplotlib.colormaps["Spectral"] | |
| # normalize | |
| depth = ((depth - depth.min()) / (depth.max() - depth.min())) | |
| # colorize | |
| if reverse_color: | |
| img_colored_np = cm(1 - depth, bytes=False)[:, :, 0:3] # Invert the depth values before applying colormap | |
| else: | |
| img_colored_np = cm(depth, bytes=False)[:, :, 0:3] # (h,w,3) | |
| depth_colored = (img_colored_np * 255).astype(np.uint8) | |
| if mask is not None: | |
| masked_image = np.zeros_like(depth_colored) | |
| masked_image[mask.numpy()] = depth_colored[mask.numpy()] | |
| depth_colored_img = Image.fromarray(masked_image) | |
| else: | |
| depth_colored_img = Image.fromarray(depth_colored) | |
| return depth_colored_img | |
| def resize_max_res( | |
| img: torch.Tensor, | |
| max_edge_resolution: int, | |
| resample_method: InterpolationMode = InterpolationMode.BILINEAR, | |
| ) -> torch.Tensor: | |
| """ | |
| Resize image to limit maximum edge length while keeping aspect ratio. | |
| Args: | |
| img (`torch.Tensor`): | |
| Image tensor to be resized. Expected shape: [B, C, H, W] | |
| max_edge_resolution (`int`): | |
| Maximum edge length (pixel). | |
| resample_method (`PIL.Image.Resampling`): | |
| Resampling method used to resize images. | |
| Returns: | |
| `torch.Tensor`: Resized image. | |
| """ | |
| assert 4 == img.dim(), f"Invalid input shape {img.shape}" | |
| original_height, original_width = img.shape[-2:] | |
| downscale_factor = min( | |
| max_edge_resolution / original_width, max_edge_resolution / original_height | |
| ) | |
| new_width = int(original_width * downscale_factor) | |
| new_height = int(original_height * downscale_factor) | |
| resized_img = resize(img, (new_height, new_width), resample_method, antialias=True) | |
| return resized_img | |
| def get_tv_resample_method(method_str: str) -> InterpolationMode: | |
| resample_method_dict = { | |
| "bilinear": InterpolationMode.BILINEAR, | |
| "bicubic": InterpolationMode.BICUBIC, | |
| "nearest": InterpolationMode.NEAREST_EXACT, | |
| "nearest-exact": InterpolationMode.NEAREST_EXACT, | |
| } | |
| resample_method = resample_method_dict.get(method_str, None) | |
| if resample_method is None: | |
| raise ValueError(f"Unknown resampling method: {resample_method}") | |
| else: | |
| return resample_method | |