import yaml import cv2 import numpy as np from pathlib import Path class ImageProcessor: def __init__(self, yaml_path): with open(yaml_path, 'r') as f: self.config = yaml.safe_load(f) self.images_info = [] self.reference_size = None self._load_images() def _load_images(self): for img_config in self.config['images']: img = cv2.imread(img_config['path']) if img is None: raise ValueError(f"Cannot load image: {img_config['path']}") info = { 'image': img, 'duration': float(img_config.get('duration', 1.0)), 'translation': img_config.get('translation', [0, 0]), 'scale': float(img_config.get('scale', 1.0)) } self.images_info.append(info) if self.reference_size is None: self.reference_size = (img.shape[1], img.shape[0]) def _translate_image(self, img, translation): """Perform only translation""" height, width = img.shape[:2] # Calculate translation amount (pixels) tx = int(width * translation[0] / 100) ty = int(height * translation[1] / 100) # Create translation matrix M = np.float32([[1, 0, tx], [0, 1, ty]]) # Apply translation while maintaining original dimensions translated = cv2.warpAffine(img, M, (width, height)) return translated def _crop_black_borders(self, img): """Crop out black borders from the image""" # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Threshold to identify non-black areas _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY) # Find bounding box of non-black pixels coords = cv2.findNonZero(thresh) if coords is None: return img x, y, w, h = cv2.boundingRect(coords) # Crop the image to the bounding box return img[y:y+h, x:x+w] def _scale_image(self, img, scale, target_size): """Scale the image""" if scale <= 1: return cv2.resize(img, target_size) # First scale up height, width = img.shape[:2] scaled_width = int(width * scale) scaled_height = int(height * scale) scaled = cv2.resize(img, (scaled_width, scaled_height)) # Center-crop to target dimensions start_x = (scaled_width - target_size[0]) // 2 start_y = (scaled_height - target_size[1]) // 2 cropped = scaled[start_y:start_y+target_size[1], start_x:start_x+target_size[0]] return cropped def _transform_image(self, img, translation, scale): """Apply transformations in sequence: translation → cropping → scaling""" original_size = (img.shape[1], img.shape[0]) # 1. Translation translated = self._translate_image(img, translation) # 2. Black border cropping cropped = self._crop_black_borders(translated) # 3. Scale back to original dimensions transformed = self._scale_image(cropped, scale, original_size) return transformed def create_video(self, output_path, fps=25): fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, self.reference_size) try: for info in self.images_info: # Transform image transformed = self._transform_image( info['image'], info['translation'], info['scale'] ) # Resize to reference dimensions if needed if transformed.shape[:2] != (self.reference_size[1], self.reference_size[0]): transformed = cv2.resize(transformed, self.reference_size) # Write video frames n_frames = int(info['duration'] * fps) for _ in range(n_frames): out.write(transformed) finally: out.release() # Enhance video quality self._improve_video_quality(output_path) def _improve_video_quality(self, video_path): import subprocess temp_path = video_path + '.temp.mp4' cmd = [ 'ffmpeg', '-i', video_path, '-c:v', 'libx264', '-preset', 'slow', '-crf', '18', '-y', temp_path ] subprocess.run(cmd) import os os.replace(temp_path, video_path) def main(): processor = ImageProcessor('tools/i2v_config.yaml') processor.create_video('convertd_video.mp4', fps=25) if __name__ == '__main__': main()