gabriel-melki
Modify package structure
860424e
import json
import csv
import openpyxl
import whisper
import os
import requests
from smolagents.tools import tool
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def _download_file(file_name: str) -> None:
if not os.path.exists(file_name):
url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[-2]}"
r = requests.get(url)
with open(file_name, "wb") as f:
f.write(r.content)
@tool
def read_file_as_text(file_name: str) -> str:
"""
Opens a file and returns its content as readable text.
Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (for mp3, it transcribes speech to text).
Args:
file_name (str): The path or name of the file.
Returns:
str: The content of the file as text, or transcribed speech if 'mp3'.
"""
_download_file(file_name)
file_type = file_name.split(".")[-1]
try:
if file_type in {"txt", "py"}:
with open(file_name, "r", encoding="utf-8") as f:
return f.read()
elif file_type == "json":
with open(file_name, "r", encoding="utf-8") as f:
data = json.load(f)
return json.dumps(data, indent=2)
elif file_type == "csv":
with open(file_name, "r", encoding="utf-8") as f:
reader = csv.reader(f)
rows = list(reader)
return "\n".join([", ".join(row) for row in rows])
elif file_type == "xlsx":
wb = openpyxl.load_workbook(file_name, data_only=True)
sheet = wb.active
content = []
for row in sheet.iter_rows(values_only=True):
content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
return "\n".join(content)
elif file_type == "mp3":
w = whisper.load_model("base")
res = w.transcribe(file_name)
return res["text"]
else:
return f"File type '{file_type}' not supported."
except FileNotFoundError:
return f"File '{file_name}' not found."
except Exception as e:
return f"Error opening file '{file_name}': {str(e)}"