Optimus-Agent-Performance

Sleeping

Optimus-Agent-Performance / generate_csv_for_space.py

gauravlochab

chore: change the system from loading to adding the csv for solving the rate limiter error

175e92c 6 months ago

6.89 kB

	#!/usr/bin/env python3
	"""
	CSV Generation Script for Hugging Face Space Deployment

	This script fetches data from the API, applies preprocessing, and saves CSV files
	that can be uploaded to your Hugging Face Space to avoid rate limiting issues.

	Usage:
	python generate_csv_for_space.py

	Output files:
	- optimus_apr_values.csv
	- optimus_apr_statistics.csv
	- optimus_roi_values.csv
	"""

	import logging
	import sys
	import os
	from datetime import datetime

	# Add the current directory to the path so we can import our modules
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	# Import our existing functions
	from app import fetch_apr_data_from_db, save_to_csv, save_roi_to_csv
	from initial_value_fixer import fix_apr_and_roi
	from load_from_csv import check_csv_data_availability, get_data_freshness_info

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s",
	handlers=[
	logging.FileHandler("csv_generation.log"),
	logging.StreamHandler()
	]
	)
	logger = logging.getLogger(__name__)

	def main():
	"""Main function to generate CSV files for Hugging Face Space deployment"""

	print("=" * 60)
	print("CSV Generation for Hugging Face Space Deployment")
	print("=" * 60)

	# Check if CSV files already exist
	print("\n1. Checking existing CSV files...")
	csv_info = check_csv_data_availability()

	for data_type, info in csv_info.items():
	if info['available']:
	print(f" ✓ {data_type.upper()}: {info['file']} ({info['records']} records, {info['size_mb']:.2f} MB)")
	print(f" Last modified: {info['modified']}")
	else:
	print(f" ✗ {data_type.upper()}: {info['error']}")

	# Check data freshness
	print("\n2. Checking data freshness...")
	freshness_info = get_data_freshness_info()

	for data_type, info in freshness_info.items():
	if data_type != 'error':
	hours_old = info['hours_old']
	is_fresh = info['is_fresh']
	status = "FRESH" if is_fresh else "STALE"
	print(f" {data_type.upper()}: {hours_old:.1f} hours old ({status})")

	# Ask user if they want to proceed
	print("\n3. Data generation options:")
	print(" [1] Generate fresh data from API (recommended)")
	print(" [2] Skip if CSV files are fresh (< 24 hours old)")
	print(" [3] Exit without generating")

	choice = input("\nEnter your choice (1-3): ").strip()

	if choice == "3":
	print("Exiting without generating CSV files.")
	return

	elif choice == "2":
	# Check if all files are fresh
	all_fresh = True
	for data_type, info in freshness_info.items():
	if data_type != 'error' and not info.get('is_fresh', False):
	all_fresh = False
	break

	if all_fresh and csv_info['apr']['available'] and csv_info['roi']['available']:
	print("All CSV files are fresh. No need to regenerate.")
	return
	else:
	print("Some CSV files are missing or stale. Proceeding with generation...")

	# Generate fresh data
	print("\n4. Fetching data from API...")
	try:
	df_apr, df_roi = fetch_apr_data_from_db()

	if df_apr.empty and df_roi.empty:
	print(" ✗ No data fetched from API. Check your connection and API status.")
	return

	print(f" ✓ Fetched {len(df_apr)} APR records and {len(df_roi)} ROI records")

	except Exception as e:
	print(f" ✗ Error fetching data: {e}")
	logger.exception("Error fetching data from API")
	return

	# Apply preprocessing
	print("\n5. Applying preprocessing...")
	try:
	if not df_apr.empty:
	df_apr_processed = fix_apr_and_roi(df_apr)
	print(f" ✓ Processed APR data: {len(df_apr_processed)} records")
	else:
	df_apr_processed = df_apr
	print(" ! No APR data to process")

	if not df_roi.empty:
	df_roi_processed = df_roi # ROI data is already processed in fetch function
	print(f" ✓ ROI data ready: {len(df_roi_processed)} records")
	else:
	df_roi_processed = df_roi
	print(" ! No ROI data to process")

	except Exception as e:
	print(f" ✗ Error during preprocessing: {e}")
	logger.exception("Error during preprocessing")
	return

	# Save CSV files
	print("\n6. Saving CSV files...")
	csv_files_created = []

	try:
	# Save APR data
	if not df_apr_processed.empty:
	apr_csv = save_to_csv(df_apr_processed)
	if apr_csv:
	csv_files_created.append(apr_csv)
	print(f" ✓ Saved APR data: {apr_csv}")

	# Also save statistics
	stats_csv = "optimus_apr_statistics.csv"
	if os.path.exists(stats_csv):
	csv_files_created.append(stats_csv)
	print(f" ✓ Saved APR statistics: {stats_csv}")

	# Save ROI data
	if not df_roi_processed.empty:
	roi_csv = save_roi_to_csv(df_roi_processed)
	if roi_csv:
	csv_files_created.append(roi_csv)
	print(f" ✓ Saved ROI data: {roi_csv}")

	if not csv_files_created:
	print(" ✗ No CSV files were created")
	return

	except Exception as e:
	print(f" ✗ Error saving CSV files: {e}")
	logger.exception("Error saving CSV files")
	return

	# Summary
	print("\n" + "=" * 60)
	print("CSV GENERATION COMPLETE")
	print("=" * 60)

	print(f"\nGenerated {len(csv_files_created)} CSV files:")
	for csv_file in csv_files_created:
	if os.path.exists(csv_file):
	size_mb = os.path.getsize(csv_file) / (1024 * 1024)
	print(f" • {csv_file} ({size_mb:.2f} MB)")

	print(f"\nGeneration completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

	print("\nNext steps for Hugging Face Space deployment:")
	print("1. Upload these CSV files to your Hugging Face Space repository")
	print("2. Ensure your Space app.py imports and uses load_from_csv functions")
	print("3. The app will prioritize CSV data over API calls, avoiding rate limits")
	print("4. Re-run this script periodically to update the CSV files with fresh data")

	print("\nDeployment tips:")
	print("• Add these CSV files to your Space's file list")
	print("• Consider setting up a scheduled job to update CSV files regularly")
	print("• Monitor your Space logs to ensure CSV loading works correctly")

	if __name__ == "__main__":
	main()