GAIA_benchmark_agent

Sleeping

GAIA_benchmark_agent / src /tools /youtube_tools.py

gabriel-melki

Modify package structure

860424e 3 months ago

3.69 kB


	import os
	import subprocess
	from yt_dlp import YoutubeDL

	from smolagents.tools import tool

	# Use FFmpeg to extract frames from the video
	def extract_frames_with_ffmpeg(video_path: str, num_frames: int) -> [str]:
	"""Extract frames from video using FFmpeg"""
	if not os.path.exists(video_path):
	raise FileNotFoundError(f"Video file not found: {video_path}")

	# Get video duration using ffprobe
	duration_cmd = [
	'ffprobe', '-v', 'quiet', '-print_format', 'json',
	'-show_format', video_path
	]

	try:
	result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
	import json
	metadata = json.loads(result.stdout)
	duration = float(metadata['format']['duration'])

	# Calculate time intervals for frame extraction
	time_intervals = [duration * i / (num_frames + 1) for i in range(1, num_frames + 1)]

	extracted_files = []
	for i, time_pos in enumerate(time_intervals):
	output_filename = f"{os.path.splitext(os.path.basename(video_path))[0]}_frame_{i+1:03d}.jpg"

	# Extract frame at specific time
	ffmpeg_cmd = [
	'ffmpeg', '-i', video_path, '-ss', str(time_pos),
	'-vframes', '1', '-q:v', '2', '-y', output_filename
	]

	subprocess.run(ffmpeg_cmd, capture_output=True, check=True)
	extracted_files.append(output_filename)

	return extracted_files

	except subprocess.CalledProcessError as e:
	print(f"Error running FFmpeg: {e}")
	return []
	except Exception as e:
	print(f"Error: {e}")
	return []


	@tool
	def download_youtube_url_audio(url: str) -> str:
	"""
	Download a YouTube video using the url, extract the audio and return the path to the downloaded file.

	Args:
	url (str): The URL of the YouTube video to download.

	Returns:
	str: The path to the downloaded audio file.
	"""
	ydl_audio_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'quiet': True,
	'no_verbose_header': True,
	'no_warnings': True,
	}

	with YoutubeDL(ydl_audio_opts) as ydl:
	file_path = ydl.extract_info(url)

	return file_path['requested_downloads'][0]['filepath']


	@tool
	def download_youtube_url_images(url: str, num_images: int = 3) -> str:
	"""
	Download a YouTube video using the url, extract the frames and return the path to the downloaded files.

	Args:
	url (str): The URL of the YouTube video to download.
	num_images (int): The number of images to download. The images are extracted from the video at regular intervals.

	Returns:
	str: The different paths to the downloaded frames, separated by newlines.
	"""
	# First, download the video
	ydl_images_opts = {
	'format': 'best[height<=720]', # Download video in reasonable quality
	'outtmpl': '%(title)s.%(ext)s', # Save with title as filename
	'quiet': True,
	'no_verbose_header': True,
	'no_warnings': True,
	}

	with YoutubeDL(ydl_images_opts) as ydl:
	info = ydl.extract_info(url, download=True)
	video_filepath = ydl.prepare_filename(info)

	# Extract frames from the downloaded video
	if os.path.exists(video_filepath):
	extracted_frames = extract_frames_with_ffmpeg(video_filepath, num_images)
	return "\n".join(extracted_frames)

	return ""