GAIA_benchmark_agent / src /tools /youtube_tools.py
gabriel-melki
Modify package structure
860424e
import os
import subprocess
from yt_dlp import YoutubeDL
from smolagents.tools import tool
# Use FFmpeg to extract frames from the video
def extract_frames_with_ffmpeg(video_path: str, num_frames: int) -> [str]:
"""Extract frames from video using FFmpeg"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"Video file not found: {video_path}")
# Get video duration using ffprobe
duration_cmd = [
'ffprobe', '-v', 'quiet', '-print_format', 'json',
'-show_format', video_path
]
try:
result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
import json
metadata = json.loads(result.stdout)
duration = float(metadata['format']['duration'])
# Calculate time intervals for frame extraction
time_intervals = [duration * i / (num_frames + 1) for i in range(1, num_frames + 1)]
extracted_files = []
for i, time_pos in enumerate(time_intervals):
output_filename = f"{os.path.splitext(os.path.basename(video_path))[0]}_frame_{i+1:03d}.jpg"
# Extract frame at specific time
ffmpeg_cmd = [
'ffmpeg', '-i', video_path, '-ss', str(time_pos),
'-vframes', '1', '-q:v', '2', '-y', output_filename
]
subprocess.run(ffmpeg_cmd, capture_output=True, check=True)
extracted_files.append(output_filename)
return extracted_files
except subprocess.CalledProcessError as e:
print(f"Error running FFmpeg: {e}")
return []
except Exception as e:
print(f"Error: {e}")
return []
@tool
def download_youtube_url_audio(url: str) -> str:
"""
Download a YouTube video using the url, extract the audio and return the path to the downloaded file.
Args:
url (str): The URL of the YouTube video to download.
Returns:
str: The path to the downloaded audio file.
"""
ydl_audio_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'quiet': True,
'no_verbose_header': True,
'no_warnings': True,
}
with YoutubeDL(ydl_audio_opts) as ydl:
file_path = ydl.extract_info(url)
return file_path['requested_downloads'][0]['filepath']
@tool
def download_youtube_url_images(url: str, num_images: int = 3) -> str:
"""
Download a YouTube video using the url, extract the frames and return the path to the downloaded files.
Args:
url (str): The URL of the YouTube video to download.
num_images (int): The number of images to download. The images are extracted from the video at regular intervals.
Returns:
str: The different paths to the downloaded frames, separated by newlines.
"""
# First, download the video
ydl_images_opts = {
'format': 'best[height<=720]', # Download video in reasonable quality
'outtmpl': '%(title)s.%(ext)s', # Save with title as filename
'quiet': True,
'no_verbose_header': True,
'no_warnings': True,
}
with YoutubeDL(ydl_images_opts) as ydl:
info = ydl.extract_info(url, download=True)
video_filepath = ydl.prepare_filename(info)
# Extract frames from the downloaded video
if os.path.exists(video_filepath):
extracted_frames = extract_frames_with_ffmpeg(video_filepath, num_images)
return "\n".join(extracted_frames)
return ""