Spaces:
Sleeping
Sleeping
gauravlochab
chore: change the system from loading to adding the csv for solving the rate limiter error
175e92c
| #!/usr/bin/env python3 | |
| """ | |
| CSV Generation Script for Hugging Face Space Deployment | |
| This script fetches data from the API, applies preprocessing, and saves CSV files | |
| that can be uploaded to your Hugging Face Space to avoid rate limiting issues. | |
| Usage: | |
| python generate_csv_for_space.py | |
| Output files: | |
| - optimus_apr_values.csv | |
| - optimus_apr_statistics.csv | |
| - optimus_roi_values.csv | |
| """ | |
| import logging | |
| import sys | |
| import os | |
| from datetime import datetime | |
| # Add the current directory to the path so we can import our modules | |
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
| # Import our existing functions | |
| from app import fetch_apr_data_from_db, save_to_csv, save_roi_to_csv | |
| from initial_value_fixer import fix_apr_and_roi | |
| from load_from_csv import check_csv_data_availability, get_data_freshness_info | |
| # Set up logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| handlers=[ | |
| logging.FileHandler("csv_generation.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def main(): | |
| """Main function to generate CSV files for Hugging Face Space deployment""" | |
| print("=" * 60) | |
| print("CSV Generation for Hugging Face Space Deployment") | |
| print("=" * 60) | |
| # Check if CSV files already exist | |
| print("\n1. Checking existing CSV files...") | |
| csv_info = check_csv_data_availability() | |
| for data_type, info in csv_info.items(): | |
| if info['available']: | |
| print(f" β {data_type.upper()}: {info['file']} ({info['records']} records, {info['size_mb']:.2f} MB)") | |
| print(f" Last modified: {info['modified']}") | |
| else: | |
| print(f" β {data_type.upper()}: {info['error']}") | |
| # Check data freshness | |
| print("\n2. Checking data freshness...") | |
| freshness_info = get_data_freshness_info() | |
| for data_type, info in freshness_info.items(): | |
| if data_type != 'error': | |
| hours_old = info['hours_old'] | |
| is_fresh = info['is_fresh'] | |
| status = "FRESH" if is_fresh else "STALE" | |
| print(f" {data_type.upper()}: {hours_old:.1f} hours old ({status})") | |
| # Ask user if they want to proceed | |
| print("\n3. Data generation options:") | |
| print(" [1] Generate fresh data from API (recommended)") | |
| print(" [2] Skip if CSV files are fresh (< 24 hours old)") | |
| print(" [3] Exit without generating") | |
| choice = input("\nEnter your choice (1-3): ").strip() | |
| if choice == "3": | |
| print("Exiting without generating CSV files.") | |
| return | |
| elif choice == "2": | |
| # Check if all files are fresh | |
| all_fresh = True | |
| for data_type, info in freshness_info.items(): | |
| if data_type != 'error' and not info.get('is_fresh', False): | |
| all_fresh = False | |
| break | |
| if all_fresh and csv_info['apr']['available'] and csv_info['roi']['available']: | |
| print("All CSV files are fresh. No need to regenerate.") | |
| return | |
| else: | |
| print("Some CSV files are missing or stale. Proceeding with generation...") | |
| # Generate fresh data | |
| print("\n4. Fetching data from API...") | |
| try: | |
| df_apr, df_roi = fetch_apr_data_from_db() | |
| if df_apr.empty and df_roi.empty: | |
| print(" β No data fetched from API. Check your connection and API status.") | |
| return | |
| print(f" β Fetched {len(df_apr)} APR records and {len(df_roi)} ROI records") | |
| except Exception as e: | |
| print(f" β Error fetching data: {e}") | |
| logger.exception("Error fetching data from API") | |
| return | |
| # Apply preprocessing | |
| print("\n5. Applying preprocessing...") | |
| try: | |
| if not df_apr.empty: | |
| df_apr_processed = fix_apr_and_roi(df_apr) | |
| print(f" β Processed APR data: {len(df_apr_processed)} records") | |
| else: | |
| df_apr_processed = df_apr | |
| print(" ! No APR data to process") | |
| if not df_roi.empty: | |
| df_roi_processed = df_roi # ROI data is already processed in fetch function | |
| print(f" β ROI data ready: {len(df_roi_processed)} records") | |
| else: | |
| df_roi_processed = df_roi | |
| print(" ! No ROI data to process") | |
| except Exception as e: | |
| print(f" β Error during preprocessing: {e}") | |
| logger.exception("Error during preprocessing") | |
| return | |
| # Save CSV files | |
| print("\n6. Saving CSV files...") | |
| csv_files_created = [] | |
| try: | |
| # Save APR data | |
| if not df_apr_processed.empty: | |
| apr_csv = save_to_csv(df_apr_processed) | |
| if apr_csv: | |
| csv_files_created.append(apr_csv) | |
| print(f" β Saved APR data: {apr_csv}") | |
| # Also save statistics | |
| stats_csv = "optimus_apr_statistics.csv" | |
| if os.path.exists(stats_csv): | |
| csv_files_created.append(stats_csv) | |
| print(f" β Saved APR statistics: {stats_csv}") | |
| # Save ROI data | |
| if not df_roi_processed.empty: | |
| roi_csv = save_roi_to_csv(df_roi_processed) | |
| if roi_csv: | |
| csv_files_created.append(roi_csv) | |
| print(f" β Saved ROI data: {roi_csv}") | |
| if not csv_files_created: | |
| print(" β No CSV files were created") | |
| return | |
| except Exception as e: | |
| print(f" β Error saving CSV files: {e}") | |
| logger.exception("Error saving CSV files") | |
| return | |
| # Summary | |
| print("\n" + "=" * 60) | |
| print("CSV GENERATION COMPLETE") | |
| print("=" * 60) | |
| print(f"\nGenerated {len(csv_files_created)} CSV files:") | |
| for csv_file in csv_files_created: | |
| if os.path.exists(csv_file): | |
| size_mb = os.path.getsize(csv_file) / (1024 * 1024) | |
| print(f" β’ {csv_file} ({size_mb:.2f} MB)") | |
| print(f"\nGeneration completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print("\nNext steps for Hugging Face Space deployment:") | |
| print("1. Upload these CSV files to your Hugging Face Space repository") | |
| print("2. Ensure your Space app.py imports and uses load_from_csv functions") | |
| print("3. The app will prioritize CSV data over API calls, avoiding rate limits") | |
| print("4. Re-run this script periodically to update the CSV files with fresh data") | |
| print("\nDeployment tips:") | |
| print("β’ Add these CSV files to your Space's file list") | |
| print("β’ Consider setting up a scheduled job to update CSV files regularly") | |
| print("β’ Monitor your Space logs to ensure CSV loading works correctly") | |
| if __name__ == "__main__": | |
| main() | |