Spaces:
Sleeping
Sleeping
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| from datetime import datetime, timedelta | |
| import json | |
| # Commenting out blockchain-related imports that cause loading issues | |
| # from web3 import Web3 | |
| import os | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib.dates as mdates | |
| import random | |
| import logging | |
| from typing import List, Dict, Any, Optional | |
| # Comment out the import for now and replace with dummy functions | |
| # from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations | |
| # APR visualization functions integrated directly | |
| from fetch_and_preprocess_data import generate_continuous_random_data | |
| from initial_value_fixer import fix_apr_and_roi | |
| from load_from_csv import ( | |
| load_apr_data_from_csv, | |
| load_roi_data_from_csv, | |
| load_statistics_from_csv, | |
| check_csv_data_availability, | |
| get_data_freshness_info | |
| ) | |
| # Disable all logging for HF Spaces to prevent Content-Length errors | |
| logging.basicConfig( | |
| level=logging.CRITICAL, # Only show critical errors | |
| format="%(message)s", # Minimal format | |
| handlers=[], # No handlers to prevent any output | |
| force=True | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.CRITICAL) | |
| # Disable all third-party library logging completely | |
| logging.getLogger("urllib3").setLevel(logging.CRITICAL) | |
| logging.getLogger("httpx").setLevel(logging.CRITICAL) | |
| logging.getLogger("matplotlib").setLevel(logging.CRITICAL) | |
| logging.getLogger("plotly").setLevel(logging.CRITICAL) | |
| logging.getLogger("pandas").setLevel(logging.CRITICAL) | |
| logging.getLogger("requests").setLevel(logging.CRITICAL) | |
| logging.getLogger("gradio").setLevel(logging.CRITICAL) | |
| # Global variables to store the data for reuse | |
| global_df = None | |
| global_roi_df = None | |
| global_dummy_apr_df = None # Store dummy APR data separately | |
| global_dummy_roi_df = None # Store dummy ROI data separately | |
| # Configuration | |
| API_BASE_URL = "https://afmdb.autonolas.tech" | |
| logger.info(f"Using API endpoint: {API_BASE_URL}") | |
| def get_agent_type_by_name(type_name: str) -> Dict[str, Any]: | |
| """Get agent type by name""" | |
| url = f"{API_BASE_URL}/api/agent-types/name/{type_name}" | |
| logger.debug(f"Calling API: {url}") | |
| try: | |
| response = requests.get(url) | |
| logger.debug(f"Response status: {response.status_code}") | |
| if response.status_code == 404: | |
| logger.error(f"Agent type '{type_name}' not found") | |
| return None | |
| response.raise_for_status() | |
| result = response.json() | |
| logger.debug(f"Agent type response: {result}") | |
| return result | |
| except Exception as e: | |
| logger.error(f"Error in get_agent_type_by_name: {e}") | |
| return None | |
| def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]: | |
| """Get attribute definition by name""" | |
| url = f"{API_BASE_URL}/api/attributes/name/{attr_name}" | |
| logger.debug(f"Calling API: {url}") | |
| try: | |
| response = requests.get(url) | |
| logger.debug(f"Response status: {response.status_code}") | |
| if response.status_code == 404: | |
| logger.error(f"Attribute definition '{attr_name}' not found") | |
| return None | |
| response.raise_for_status() | |
| result = response.json() | |
| logger.debug(f"Attribute definition response: {result}") | |
| return result | |
| except Exception as e: | |
| logger.error(f"Error in get_attribute_definition_by_name: {e}") | |
| return None | |
| def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]: | |
| """Get all agents of a specific type""" | |
| url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/" | |
| logger.debug(f"Calling API: {url}") | |
| try: | |
| response = requests.get(url) | |
| logger.debug(f"Response status: {response.status_code}") | |
| if response.status_code == 404: | |
| logger.error(f"No agents found for type ID {type_id}") | |
| return [] | |
| response.raise_for_status() | |
| result = response.json() | |
| logger.debug(f"Agents count: {len(result)}") | |
| logger.debug(f"First few agents: {result[:2] if result else []}") | |
| return result | |
| except Exception as e: | |
| logger.error(f"Error in get_agents_by_type: {e}") | |
| return [] | |
| def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]: | |
| """Get all attribute values for a specific attribute definition across all agents of a given list""" | |
| all_attributes = [] | |
| logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}") | |
| # For each agent, get their attributes and filter for the one we want | |
| for agent in agents: | |
| agent_id = agent["agent_id"] | |
| # Call the /api/agents/{agent_id}/attributes/ endpoint | |
| url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/" | |
| logger.debug(f"Calling API for agent {agent_id}: {url}") | |
| try: | |
| response = requests.get(url, params={"limit": 1000}) | |
| if response.status_code == 404: | |
| logger.error(f"No attributes found for agent ID {agent_id}") | |
| continue | |
| response.raise_for_status() | |
| agent_attrs = response.json() | |
| logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes") | |
| # Filter for the specific attribute definition ID | |
| filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id] | |
| logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes") | |
| if filtered_attrs: | |
| logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}") | |
| all_attributes.extend(filtered_attrs) | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}") | |
| logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}") | |
| return all_attributes | |
| def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str: | |
| """Get agent name from agent ID""" | |
| for agent in agents: | |
| if agent["agent_id"] == agent_id: | |
| return agent["agent_name"] | |
| return "Unknown" | |
| def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]: | |
| """Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value""" | |
| try: | |
| agent_id = attr.get("agent_id", "unknown") | |
| logger.debug(f"Extracting APR value for agent {agent_id}") | |
| # The APR value is stored in the json_value field | |
| if attr["json_value"] is None: | |
| logger.debug(f"Agent {agent_id}: json_value is None") | |
| return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False} | |
| # If json_value is a string, parse it | |
| if isinstance(attr["json_value"], str): | |
| logger.debug(f"Agent {agent_id}: json_value is string, parsing") | |
| json_data = json.loads(attr["json_value"]) | |
| else: | |
| json_data = attr["json_value"] | |
| apr = json_data.get("apr") | |
| adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present | |
| timestamp = json_data.get("timestamp") | |
| address = json_data.get("portfolio_snapshot", {}).get("portfolio", {}).get("address") | |
| # Extract ROI (f_i_ratio) from calculation_metrics if it exists | |
| roi = None | |
| if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None: | |
| roi = json_data["calculation_metrics"].get("f_i_ratio") | |
| # Filter ROI values to -10 to 10 range | |
| if roi is not None and (roi < -10 or roi > 10): | |
| roi = None # Exclude ROI values outside the range | |
| logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}") | |
| # Convert timestamp to datetime if it exists | |
| timestamp_dt = None | |
| if timestamp: | |
| timestamp_dt = datetime.fromtimestamp(timestamp) | |
| result = json_data.copy() # Copy the original JSON data for logging | |
| result.update({ | |
| "apr": apr, | |
| "adjusted_apr": adjusted_apr, | |
| "roi": roi, | |
| "timestamp": timestamp_dt, | |
| "agent_id": agent_id, | |
| "is_dummy": False, | |
| "address": address | |
| }) | |
| logger.debug(f"Agent {agent_id}: Extracted result: {result}") | |
| return result | |
| except (json.JSONDecodeError, KeyError, TypeError) as e: | |
| logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}") | |
| logger.error(f"Problematic json_value: {attr.get('json_value')}") | |
| return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False, "address": None} | |
| def fetch_apr_data_from_db(): | |
| """ | |
| Fetch APR data from database using the API. | |
| """ | |
| global global_df | |
| global global_roi_df | |
| logger.info("==== Starting APR data fetch ====") | |
| try: | |
| # Step 1: Find the Optimus agent type | |
| logger.info("Finding Optimus agent type") | |
| optimus_type = get_agent_type_by_name("Optimus") | |
| if not optimus_type: | |
| logger.error("Optimus agent type not found, using placeholder data") | |
| global_df = pd.DataFrame([]) | |
| return global_df | |
| type_id = optimus_type["type_id"] | |
| logger.info(f"Found Optimus agent type with ID: {type_id}") | |
| # Step 2: Find the APR attribute definition | |
| logger.info("Finding APR attribute definition") | |
| apr_attr_def = get_attribute_definition_by_name("APR") | |
| if not apr_attr_def: | |
| logger.error("APR attribute definition not found, using placeholder data") | |
| global_df = pd.DataFrame([]) | |
| return global_df | |
| attr_def_id = apr_attr_def["attr_def_id"] | |
| logger.info(f"Found APR attribute definition with ID: {attr_def_id}") | |
| # Step 3: Get all agents of type Optimus | |
| logger.info(f"Getting all agents of type Optimus (type_id: {type_id})") | |
| optimus_agents = get_agents_by_type(type_id) | |
| if not optimus_agents: | |
| logger.error("No agents of type 'Optimus' found") | |
| global_df = pd.DataFrame([]) | |
| return global_df | |
| logger.info(f"Found {len(optimus_agents)} Optimus agents") | |
| logger.debug(f"Optimus agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in optimus_agents]}") | |
| # Step 4: Fetch all APR values for Optimus agents | |
| logger.info(f"Fetching APR values for all Optimus agents (attr_def_id: {attr_def_id})") | |
| apr_attributes = get_attribute_values_by_type_and_attr(optimus_agents, attr_def_id) | |
| if not apr_attributes: | |
| logger.error("No APR values found for 'Optimus' agents") | |
| global_df = pd.DataFrame([]) | |
| return global_df | |
| logger.info(f"Found {len(apr_attributes)} APR attributes total") | |
| # Step 5: Extract APR and ROI data | |
| logger.info("Extracting APR and ROI data from attributes") | |
| apr_data_list = [] | |
| roi_data_list = [] | |
| for attr in apr_attributes: | |
| data = extract_apr_value(attr) | |
| if data["timestamp"] is not None: | |
| # Get agent name | |
| agent_name = get_agent_name(attr["agent_id"], optimus_agents) | |
| # Add agent name to the data | |
| data["agent_name"] = agent_name | |
| # Add is_dummy flag (all real data) | |
| data["is_dummy"] = False | |
| # Process APR data | |
| if data["apr"] is not None: | |
| # Include all APR values (including negative ones) EXCEPT zero and -100 | |
| if data["apr"] != 0 and data["apr"] != -100: | |
| apr_entry = data.copy() | |
| apr_entry["metric_type"] = "APR" | |
| logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}") | |
| # Add to the APR data list | |
| apr_data_list.append(apr_entry) | |
| else: | |
| # Log that we're skipping zero or -100 values | |
| logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)") | |
| # Process ROI data | |
| if data["roi"] is not None: | |
| # Include all ROI values | |
| roi_entry = { | |
| "roi": data["roi"], | |
| "timestamp": data["timestamp"], | |
| "agent_id": data["agent_id"], | |
| "agent_name": agent_name, | |
| "is_dummy": False, | |
| "metric_type": "ROI" | |
| } | |
| logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}") | |
| # Add to the ROI data list | |
| roi_data_list.append(roi_entry) | |
| logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points") | |
| # Added debug for adjusted APR data after May 10th | |
| may_10_2025 = datetime(2025, 5, 10) | |
| after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025] | |
| with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None] | |
| logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") | |
| logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") | |
| # Log detailed information about when data began | |
| first_adjusted = None | |
| if with_adjusted_after_may_10: | |
| first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp']) | |
| logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})") | |
| # Check all data for first adjusted_apr | |
| all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None] | |
| if all_with_adjusted: | |
| first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp']) | |
| logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})") | |
| last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp']) | |
| logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})") | |
| # Calculate overall coverage | |
| adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100 | |
| logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)") | |
| # Log per-agent adjusted APR statistics | |
| agent_stats = {} | |
| for record in apr_data_list: | |
| agent_id = record['agent_id'] | |
| has_adjusted = record['adjusted_apr'] is not None | |
| if agent_id not in agent_stats: | |
| agent_stats[agent_id] = {'total': 0, 'adjusted': 0} | |
| agent_stats[agent_id]['total'] += 1 | |
| if has_adjusted: | |
| agent_stats[agent_id]['adjusted'] += 1 | |
| # Log stats for agents with meaningful data | |
| for agent_id, stats in agent_stats.items(): | |
| if stats['total'] > 0: | |
| coverage = (stats['adjusted'] / stats['total']) * 100 | |
| if coverage > 0: # Only log agents that have at least some adjusted data | |
| logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)") | |
| # Check for gaps in adjusted APR data | |
| for agent_id in agent_stats: | |
| # Get all records for this agent | |
| agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id] | |
| # Sort by timestamp | |
| agent_records.sort(key=lambda x: x['timestamp']) | |
| # Find where adjusted APR starts and if there are gaps | |
| has_adjusted = False | |
| gap_count = 0 | |
| streak_length = 0 | |
| for record in agent_records: | |
| if record['adjusted_apr'] is not None: | |
| if not has_adjusted: | |
| has_adjusted = True | |
| logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}") | |
| streak_length += 1 | |
| elif has_adjusted: | |
| # We had adjusted data but now it's missing | |
| gap_count += 1 | |
| if streak_length > 0: | |
| logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records") | |
| streak_length = 0 | |
| if gap_count > 0: | |
| logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data") | |
| elif has_adjusted: | |
| logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps") | |
| # Provide summary statistics | |
| agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0) | |
| agents_with_gaps = sum(1 for agent_id in agent_stats if | |
| any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and | |
| i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and | |
| apr_data_list[i+1]['adjusted_apr'] is None | |
| for i in range(len(apr_data_list)-1))) | |
| logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data") | |
| if agents_with_gaps > 0: | |
| logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data") | |
| logger.warning("These gaps may cause discontinuities in the adjusted APR graph") | |
| else: | |
| logger.info("No gaps detected in adjusted APR data - graph should be continuous") | |
| if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0: | |
| logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data") | |
| # Log agent IDs with missing adjusted_apr after May 10th | |
| agents_after_may_10 = set(d['agent_id'] for d in after_may_10) | |
| logger.info(f"Agents with data after May 10th: {agents_after_may_10}") | |
| # Check these same agents before May 10th | |
| before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025] | |
| agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None} | |
| # Agents that had adjusted_apr before but not after | |
| missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10) | |
| if missing_adjusted: | |
| logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}") | |
| # Find the last valid adjusted_apr date for these agents | |
| for agent_id in missing_adjusted: | |
| agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None] | |
| if agent_data: | |
| last_entry = max(agent_data, key=lambda d: d['timestamp']) | |
| logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}") | |
| # Look at the first entry after the cutoff without adjusted_apr | |
| agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id] | |
| if agent_after: | |
| first_after = min(agent_after, key=lambda d: d['timestamp']) | |
| logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr") | |
| # If the agent data has the 'adjusted_apr_key' field, log that info | |
| if 'adjusted_apr_key' in first_after: | |
| logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}") | |
| # Add debug logic to check for any adjusted_apr after May 10th and which agents have it | |
| elif len(with_adjusted_after_may_10) > 0: | |
| logger.info("Found adjusted_apr values after May 10th, 2025") | |
| # Group by agent and log | |
| agent_counts = {} | |
| for item in with_adjusted_after_may_10: | |
| agent_id = item['agent_id'] | |
| if agent_id in agent_counts: | |
| agent_counts[agent_id] += 1 | |
| else: | |
| agent_counts[agent_id] = 1 | |
| logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}") | |
| # Log adjusted_apr keys used | |
| keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item} | |
| if keys_used: | |
| logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}") | |
| # Convert to DataFrames | |
| if not apr_data_list: | |
| logger.error("No valid APR data extracted") | |
| global_df = pd.DataFrame([]) | |
| else: | |
| # Convert list of dictionaries to DataFrame for APR | |
| global_df = pd.DataFrame(apr_data_list) | |
| if not roi_data_list: | |
| logger.error("No valid ROI data extracted") | |
| global_roi_df = pd.DataFrame([]) | |
| else: | |
| # Convert list of dictionaries to DataFrame for ROI | |
| global_roi_df = pd.DataFrame(roi_data_list) | |
| # Handle dummy data generation | |
| global global_dummy_apr_df | |
| global global_dummy_roi_df | |
| logger.info("Handling dummy data...") | |
| # Generate dummy APR data only if needed | |
| if not global_df.empty: | |
| # Check if we already have dummy data | |
| if global_dummy_apr_df is None: | |
| # First time - generate all dummy data | |
| logger.info("Generating initial dummy APR data...") | |
| global_dummy_apr_df = generate_continuous_random_data(global_df) | |
| # Only keep APR data | |
| if not global_dummy_apr_df.empty: | |
| global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR'] | |
| logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points") | |
| else: | |
| # We already have dummy data - check if we need to generate more | |
| # Find the latest timestamp in the real data | |
| latest_real_timestamp = global_df['timestamp'].max() | |
| # Find the latest timestamp in the dummy data | |
| latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None | |
| # If the real data has newer timestamps, generate more dummy data | |
| if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp: | |
| logger.info("Generating additional dummy APR data for new timestamps...") | |
| # Create a temporary dataframe with only the latest real data | |
| temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df | |
| # Generate dummy data for the new timestamps | |
| new_dummy_data = generate_continuous_random_data(temp_df) | |
| # Only keep APR data | |
| if not new_dummy_data.empty: | |
| new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR'] | |
| logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points") | |
| # Append the new dummy data to the existing dummy data | |
| global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True) | |
| else: | |
| logger.info("No new timestamps in real data, using existing dummy APR data") | |
| # Combine real and dummy APR data | |
| if not global_dummy_apr_df.empty: | |
| apr_dummy_count = len(global_dummy_apr_df) | |
| global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True) | |
| logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset") | |
| # Generate dummy ROI data only if needed | |
| if not global_roi_df.empty: | |
| # Check if we already have dummy data | |
| if global_dummy_roi_df is None: | |
| # First time - generate all dummy data | |
| logger.info("Generating initial dummy ROI data...") | |
| global_dummy_roi_df = generate_continuous_random_data(global_roi_df) | |
| # Only keep ROI data | |
| if not global_dummy_roi_df.empty: | |
| global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI'] | |
| logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points") | |
| else: | |
| # We already have dummy data - check if we need to generate more | |
| # Find the latest timestamp in the real data | |
| latest_real_timestamp = global_roi_df['timestamp'].max() | |
| # Find the latest timestamp in the dummy data | |
| latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None | |
| # If the real data has newer timestamps, generate more dummy data | |
| if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp: | |
| logger.info("Generating additional dummy ROI data for new timestamps...") | |
| # Create a temporary dataframe with only the latest real data | |
| temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df | |
| # Generate dummy data for the new timestamps | |
| new_dummy_data = generate_continuous_random_data(temp_df) | |
| # Only keep ROI data | |
| if not new_dummy_data.empty: | |
| new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI'] | |
| logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points") | |
| # Append the new dummy data to the existing dummy data | |
| global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True) | |
| else: | |
| logger.info("No new timestamps in real data, using existing dummy ROI data") | |
| # Combine real and dummy ROI data | |
| if not global_dummy_roi_df.empty: | |
| roi_dummy_count = len(global_dummy_roi_df) | |
| global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True) | |
| logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset") | |
| # Log the resulting dataframe | |
| logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)") | |
| logger.info(f"DataFrame columns: {global_df.columns.tolist()}") | |
| logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}") | |
| # Log adjusted APR statistics if available | |
| if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any(): | |
| logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}") | |
| logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}") | |
| # Log the difference between APR and adjusted APR | |
| valid_rows = global_df[global_df['adjusted_apr'].notna()] | |
| if not valid_rows.empty: | |
| avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean() | |
| max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max() | |
| min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min() | |
| logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}") | |
| # All values are APR type (excluding zero and -100 values) | |
| logger.info("All values are APR type (excluding zero and -100 values)") | |
| logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}") | |
| # Debug logging disabled for HF Spaces to prevent Content-Length issues | |
| # Add this at the end, right before returning | |
| logger.info("Analyzing adjusted_apr data availability...") | |
| log_adjusted_apr_availability(global_df) | |
| return global_df, global_roi_df | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"API request error: {e}") | |
| global_df = pd.DataFrame([]) | |
| global_roi_df = pd.DataFrame([]) | |
| return global_df, global_roi_df | |
| except Exception as e: | |
| logger.error(f"Error fetching APR data: {e}") | |
| logger.exception("Exception traceback:") | |
| global_df = pd.DataFrame([]) | |
| global_roi_df = pd.DataFrame([]) | |
| return global_df, global_roi_df | |
| def log_adjusted_apr_availability(df): | |
| """ | |
| Analyzes and logs detailed information about adjusted_apr data availability. | |
| Args: | |
| df: DataFrame containing the APR data with adjusted_apr column | |
| """ | |
| if df.empty or 'adjusted_apr' not in df.columns: | |
| logger.warning("No adjusted_apr data available for analysis") | |
| return | |
| # Get only rows with valid adjusted_apr values | |
| has_adjusted = df[df['adjusted_apr'].notna()] | |
| if has_adjusted.empty: | |
| logger.warning("No valid adjusted_apr values found in the dataset") | |
| return | |
| # 1. When did adjusted_apr data start? | |
| first_adjusted = has_adjusted['timestamp'].min() | |
| last_adjusted = has_adjusted['timestamp'].max() | |
| logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}") | |
| logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}") | |
| logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days") | |
| # Calculate coverage percentage | |
| total_records = len(df) | |
| records_with_adjusted = len(has_adjusted) | |
| coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 | |
| logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)") | |
| # 2. How many agents are providing adjusted_apr? | |
| agents_with_adjusted = has_adjusted['agent_id'].unique() | |
| logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr") | |
| logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}") | |
| # 3. May 10th cutoff analysis | |
| may_10_2025 = datetime(2025, 5, 10) | |
| before_cutoff = df[df['timestamp'] < may_10_2025] | |
| after_cutoff = df[df['timestamp'] >= may_10_2025] | |
| if not before_cutoff.empty and not after_cutoff.empty: | |
| before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() | |
| before_pct = (before_with_adjusted / len(before_cutoff)) * 100 | |
| after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() | |
| after_pct = (after_with_adjusted / len(after_cutoff)) * 100 | |
| logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)") | |
| logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)") | |
| # Check which agents had data before and after | |
| agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) | |
| agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) | |
| missing_after = agents_before - agents_after | |
| if missing_after: | |
| logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}") | |
| new_after = agents_after - agents_before | |
| if new_after: | |
| logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}") | |
| # 4. Find date ranges for missing adjusted_apr | |
| # Group by agent to analyze per-agent data availability | |
| logger.info("=== DETAILED AGENT ANALYSIS ===") | |
| for agent_id in df['agent_id'].unique(): | |
| agent_data = df[df['agent_id'] == agent_id] | |
| agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" | |
| # Get the valid adjusted_apr values for this agent | |
| agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] | |
| if agent_adjusted.empty: | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available") | |
| continue | |
| # Get the date range for this agent's data | |
| agent_start = agent_data['timestamp'].min() | |
| agent_end = agent_data['timestamp'].max() | |
| # Get the date range for adjusted_apr data | |
| adjusted_start = agent_adjusted['timestamp'].min() | |
| adjusted_end = agent_adjusted['timestamp'].max() | |
| total_agent_records = len(agent_data) | |
| agent_with_adjusted = len(agent_adjusted) | |
| coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)") | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}") | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}") | |
| # Calculate if this agent had data before/after May 10th | |
| if not before_cutoff.empty and not after_cutoff.empty: | |
| agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id] | |
| agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id] | |
| has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any() | |
| has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any() | |
| if has_before and not has_after: | |
| last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max() | |
| logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}") | |
| elif not has_before and has_after: | |
| first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min() | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}") | |
| # Check for gaps in adjusted_apr (periods of 24+ hours without data) | |
| if len(agent_adjusted) < 2: | |
| continue | |
| # Sort by timestamp | |
| sorted_data = agent_adjusted.sort_values('timestamp') | |
| # Calculate time differences between consecutive data points | |
| time_diffs = sorted_data['timestamp'].diff() | |
| # Find gaps larger than 24 hours | |
| gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)] | |
| if not gaps.empty: | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data") | |
| # Log the gaps | |
| for i, row in gaps.iterrows(): | |
| # Find the previous timestamp before the gap | |
| prev_idx = sorted_data.index.get_loc(i) - 1 | |
| prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None | |
| if prev_time: | |
| gap_start = prev_time | |
| gap_end = row['timestamp'] | |
| gap_duration = gap_end - gap_start | |
| logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)") | |
| def generate_apr_visualizations(): | |
| """Generate APR visualizations using CSV data only for consistency with ROI graph""" | |
| global global_df | |
| # CONSISTENCY FIX: Always use CSV data to match ROI graph behavior | |
| logger.info("Loading APR data from CSV files for consistency with ROI graph...") | |
| df, csv_file = load_apr_data_from_csv() | |
| if not df.empty: | |
| logger.info(f"Successfully loaded APR data from CSV: {len(df)} records") | |
| global_df = df | |
| # Create visualizations using CSV data | |
| logger.info("Creating APR visualizations from CSV data...") | |
| combined_fig = create_combined_time_series_graph(df) | |
| return combined_fig, csv_file | |
| # FALLBACK: If CSV not available, return error message | |
| logger.error("CSV data not available and API fallback disabled for consistency") | |
| # Create empty visualization with a message using Plotly | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| x=0.5, y=0.5, | |
| text="No APR data available - CSV file missing", | |
| font=dict(size=20), | |
| showarrow=False | |
| ) | |
| fig.update_layout( | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) | |
| ) | |
| return fig, None | |
| def generate_roi_visualizations(): | |
| """Generate ROI visualizations directly from optimus_apr_values.csv""" | |
| global global_roi_df | |
| # SIMPLIFIED APPROACH: Load ROI data directly from APR CSV | |
| logger.info("Loading ROI data directly from optimus_apr_values.csv...") | |
| df_apr, csv_file = load_apr_data_from_csv() | |
| if not df_apr.empty and 'roi' in df_apr.columns: | |
| # CONSISTENCY FIX: Apply same filtering as APR graph | |
| logger.info("=== ROI GRAPH DATA FILTERING DEBUG ===") | |
| logger.info(f"Initial APR data loaded: {len(df_apr)} records") | |
| logger.info(f"Unique agents in initial data: {df_apr['agent_id'].nunique()}") | |
| logger.info(f"Agent IDs in initial data: {sorted(df_apr['agent_id'].unique().tolist())}") | |
| # Check metric_type distribution | |
| if 'metric_type' in df_apr.columns: | |
| metric_counts = df_apr['metric_type'].value_counts() | |
| logger.info(f"Metric type distribution: {metric_counts.to_dict()}") | |
| else: | |
| logger.warning("No 'metric_type' column found in APR data") | |
| # First filter by metric_type == 'APR' to match APR graph logic | |
| df_apr_filtered = df_apr[df_apr['metric_type'] == 'APR'].copy() | |
| logger.info(f"After metric_type == 'APR' filter: {len(df_apr_filtered)} records") | |
| logger.info(f"Unique agents after APR filter: {df_apr_filtered['agent_id'].nunique()}") | |
| logger.info(f"Agent IDs after APR filter: {sorted(df_apr_filtered['agent_id'].unique().tolist())}") | |
| # Then filter for rows with valid ROI values | |
| df_roi = df_apr_filtered[df_apr_filtered['roi'].notna()].copy() | |
| logger.info(f"After ROI filter: {len(df_roi)} records") | |
| logger.info(f"Unique agents after ROI filter: {df_roi['agent_id'].nunique()}") | |
| logger.info(f"Agent IDs after ROI filter: {sorted(df_roi['agent_id'].unique().tolist())}") | |
| if not df_roi.empty: | |
| # Add metric_type column for consistency | |
| df_roi['metric_type'] = 'ROI' | |
| logger.info(f"Successfully loaded {len(df_roi)} ROI records from APR CSV") | |
| global_roi_df = df_roi | |
| # Create visualizations using ROI data from APR CSV | |
| logger.info("Creating ROI visualizations from APR CSV data...") | |
| combined_fig = create_combined_roi_time_series_graph(df_roi) | |
| return combined_fig, csv_file | |
| else: | |
| logger.warning("No valid ROI data found in APR CSV") | |
| else: | |
| logger.warning("APR CSV not available or missing ROI column") | |
| # FALLBACK: If CSV not available, try API | |
| logger.info("CSV data not available, falling back to API...") | |
| try: | |
| # Fetch data from database if not already fetched | |
| if global_roi_df is None or global_roi_df.empty: | |
| _, df_roi = fetch_apr_data_from_db() | |
| else: | |
| df_roi = global_roi_df | |
| # If we got no data at all, return placeholder figures | |
| if df_roi.empty: | |
| logger.info("No ROI data available from API either. Using fallback visualization.") | |
| # Create empty visualizations with a message using Plotly | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| x=0.5, y=0.5, | |
| text="No ROI data available", | |
| font=dict(size=20), | |
| showarrow=False | |
| ) | |
| fig.update_layout( | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) | |
| ) | |
| return fig, None | |
| # Set global_roi_df for access by other functions | |
| global_roi_df = df_roi | |
| # Create visualizations using API data | |
| logger.info("Creating ROI visualizations from API data...") | |
| combined_fig = create_combined_roi_time_series_graph(df_roi) | |
| return combined_fig, None | |
| except Exception as e: | |
| logger.error(f"Error fetching ROI data from API: {e}") | |
| # Return error visualization | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| x=0.5, y=0.5, | |
| text=f"Error loading data: {str(e)}", | |
| font=dict(size=16, color="red"), | |
| showarrow=False | |
| ) | |
| fig.update_layout( | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) | |
| ) | |
| return fig, None | |
| def aggregate_daily_data(df, metric_column): | |
| """ | |
| Aggregate data by date and agent, taking the median of values within each day. | |
| Args: | |
| df: DataFrame with timestamp, agent_id, and metric data | |
| metric_column: Name of the metric column ('apr' or 'roi') | |
| Returns: | |
| DataFrame with daily aggregated data per agent | |
| """ | |
| if df.empty: | |
| return df | |
| # Convert timestamp to date only (ignore time) | |
| df = df.copy() | |
| df['date'] = df['timestamp'].dt.date | |
| # DEBUG: Log July 8th data specifically | |
| july_8_data = df[df['date'] == pd.to_datetime('2025-07-08').date()] | |
| if not july_8_data.empty: | |
| july_8_agents = july_8_data['agent_id'].unique() | |
| logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agents before aggregation: {len(july_8_agents)}") | |
| logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agent IDs: {sorted(july_8_agents.tolist())}") | |
| # NEW: Add detailed logging to verify median calculation | |
| logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===") | |
| # Find days with multiple data points per agent to show the difference | |
| sample_groups = df.groupby(['date', 'agent_id']).size() | |
| multi_point_days = sample_groups[sample_groups > 1].head(10) # Show up to 10 examples | |
| logger.info(f"Found {len(multi_point_days)} agent-days with multiple data points (showing up to 10):") | |
| mean_median_differences = [] | |
| for (date, agent_id), count in multi_point_days.items(): | |
| day_data = df[(df['date'] == date) & (df['agent_id'] == agent_id)] | |
| values = day_data[metric_column].tolist() | |
| calculated_mean = day_data[metric_column].mean() | |
| calculated_median = day_data[metric_column].median() | |
| agent_name = day_data['agent_name'].iloc[0] if not day_data.empty else f"Agent {agent_id}" | |
| difference = abs(calculated_mean - calculated_median) | |
| mean_median_differences.append(difference) | |
| logger.info(f" {agent_name} on {date}: {count} values = {values}") | |
| logger.info(f" MEAN: {calculated_mean:.4f}, MEDIAN: {calculated_median:.4f}, DIFF: {difference:.4f}") | |
| # Summary statistics | |
| if mean_median_differences: | |
| avg_difference = sum(mean_median_differences) / len(mean_median_differences) | |
| max_difference = max(mean_median_differences) | |
| logger.info(f"Mean vs Median differences - Avg: {avg_difference:.4f}, Max: {max_difference:.4f}") | |
| else: | |
| logger.info("No days found with multiple data points per agent") | |
| # Show total distribution of data points per day | |
| single_point_days = len(sample_groups[sample_groups == 1]) | |
| multi_point_days_count = len(sample_groups[sample_groups > 1]) | |
| logger.info(f"Data distribution: {single_point_days} agent-days with 1 point, {multi_point_days_count} agent-days with multiple points") | |
| # Group by date and agent, calculate median for each day | |
| daily_agent_data = df.groupby(['date', 'agent_id']).agg({ | |
| metric_column: 'mean', | |
| 'agent_name': 'first', | |
| 'is_dummy': 'first', | |
| 'metric_type': 'first' | |
| }).reset_index() | |
| # Convert date back to datetime for plotting | |
| daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date']) | |
| # Log a few sample median values from the result | |
| logger.info(f"Sample calculated median values:") | |
| for i, row in daily_agent_data.head(5).iterrows(): | |
| logger.info(f" {row['agent_name']} on {row['date']}: median {metric_column} = {row[metric_column]:.4f}") | |
| logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column} using MEDIAN") | |
| return daily_agent_data | |
| def calculate_daily_medians(daily_agent_data, metric_column): | |
| """ | |
| Calculate daily medians across all agents for each date. | |
| Args: | |
| daily_agent_data: DataFrame with daily aggregated data per agent | |
| metric_column: Name of the metric column ('apr' or 'roi') | |
| Returns: | |
| DataFrame with daily median values | |
| """ | |
| if daily_agent_data.empty: | |
| return daily_agent_data | |
| # For each date, calculate median across all agents (excluding missing data) | |
| daily_medians = daily_agent_data.groupby('date').agg({ | |
| metric_column: 'median' | |
| }).reset_index() | |
| # Convert date back to datetime for plotting | |
| daily_medians['timestamp'] = pd.to_datetime(daily_medians['date']) | |
| logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}") | |
| return daily_medians | |
| def calculate_moving_average_medians(daily_medians, metric_column, window_days=7): | |
| """ | |
| Calculate moving average of daily medians using a specified time window. | |
| Args: | |
| daily_medians: DataFrame with daily median values | |
| metric_column: Name of the metric column ('apr' or 'roi') | |
| window_days: Number of days for the moving average window | |
| Returns: | |
| DataFrame with moving average values added | |
| """ | |
| if daily_medians.empty: | |
| return daily_medians | |
| # Sort by timestamp | |
| daily_medians = daily_medians.sort_values('timestamp').copy() | |
| # Initialize moving average column | |
| daily_medians['moving_avg'] = None | |
| # Define the time window | |
| time_window = pd.Timedelta(days=window_days) | |
| logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}") | |
| # Calculate moving averages for each timestamp | |
| for i, row in daily_medians.iterrows(): | |
| current_time = row['timestamp'] | |
| window_start = current_time - time_window | |
| # Get all median values within the time window | |
| window_data = daily_medians[ | |
| (daily_medians['timestamp'] >= window_start) & | |
| (daily_medians['timestamp'] <= current_time) | |
| ] | |
| # Calculate the average of medians for the time window | |
| if not window_data.empty: | |
| daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean() | |
| else: | |
| # If no data points in the window, use the current value | |
| daily_medians.at[i, 'moving_avg'] = row[metric_column] | |
| logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points") | |
| return daily_medians | |
| def create_combined_roi_time_series_graph(df): | |
| """Create a time series graph showing daily median ROI values with 7-day moving average""" | |
| if len(df) == 0: | |
| logger.error("No data to plot combined ROI graph") | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No ROI data available", | |
| x=0.5, y=0.5, | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig | |
| # Calculate runtime for each agent from their actual first data point | |
| logger.info(f"Calculating runtime for each agent from their actual start date") | |
| agent_runtimes = {} | |
| for agent_id in df['agent_id'].unique(): | |
| agent_data = df[df['agent_id'] == agent_id] | |
| agent_name = agent_data['agent_name'].iloc[0] | |
| first_report = agent_data['timestamp'].min() # Agent's actual start date | |
| last_report = agent_data['timestamp'].max() # Agent's last report | |
| runtime_days = (last_report - first_report).total_seconds() / (24 * 3600) # Convert to days | |
| agent_runtimes[agent_id] = { | |
| 'agent_name': agent_name, | |
| 'first_report': first_report, | |
| 'last_report': last_report, | |
| 'runtime_days': runtime_days | |
| } | |
| # Calculate average runtime | |
| avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0 | |
| logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days") | |
| # Log individual agent runtimes for debugging | |
| for agent_id, data in agent_runtimes.items(): | |
| logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}") | |
| # SIMPLIFIED: ROI data is already clean from CSV, just ensure proper data types | |
| logger.info("Processing ROI data from CSV...") | |
| # Remove rows with invalid ROI values | |
| initial_count = len(df) | |
| df = df[df['roi'].notna()] | |
| final_count = len(df) | |
| removed_count = initial_count - final_count | |
| if removed_count > 0: | |
| logger.warning(f"Removed {removed_count} rows with invalid ROI values") | |
| # Ensure proper data types | |
| df['roi'] = df['roi'].astype(float) | |
| df['metric_type'] = df['metric_type'].astype(str) | |
| # Get min and max time for shapes | |
| min_time = df['timestamp'].min() | |
| max_time = df['timestamp'].max() | |
| # Use the actual start date from the data instead of a fixed date | |
| x_start_date = min_time | |
| # CRITICAL: Log the exact dataframe we're using for plotting to help debug | |
| logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}") | |
| logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}") | |
| logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}") | |
| # Debug CSV export disabled for HF Spaces to prevent Content-Length issues | |
| # Create Plotly figure in a clean state | |
| fig = go.Figure() | |
| # Get min and max time for shapes | |
| min_time = df['timestamp'].min() | |
| max_time = df['timestamp'].max() | |
| # Add background shapes for positive and negative regions | |
| # Add shape for positive ROI region (above zero) - use reasonable fixed range | |
| fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(230, 243, 255, 0.3)", | |
| line=dict(width=0), | |
| y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| # Add shape for negative ROI region (below zero) - use reasonable fixed range | |
| fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(255, 230, 230, 0.3)", | |
| line=dict(width=0), | |
| y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| # Add zero line | |
| fig.add_shape( | |
| type="line", | |
| line=dict(dash="solid", width=1.5, color="black"), | |
| y0=0, y1=0, | |
| x0=min_time, x1=max_time | |
| ) | |
| # Filter ROI outliers for better visualization (±200% range) | |
| before_outlier_filter = len(df) | |
| df = df[(df['roi'] <= 200) & (df['roi'] >= -200)] | |
| after_outlier_filter = len(df) | |
| excluded_by_outlier = before_outlier_filter - after_outlier_filter | |
| logger.info(f"ROI outlier filtering: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded)") | |
| # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025) | |
| min_date = datetime(2025, 6, 6) | |
| max_date = datetime(2025, 7, 31, 23, 59, 59) # Include all of July 8th | |
| logger.info(f"Filtering ROI data to date range: {min_date} to {max_date}") | |
| # Count data points before filtering | |
| before_filter_count = len(df) | |
| # Apply date filter | |
| df = df[(df['timestamp'] >= min_date) & (df['timestamp'] <= max_date)] | |
| # Count data points after filtering | |
| after_filter_count = len(df) | |
| excluded_by_date = before_filter_count - after_filter_count | |
| logger.info(f"ROI Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)") | |
| # NEW APPROACH: Daily aggregation and median calculation | |
| # Step 1: Aggregate data daily per agent (mean of values within each day) | |
| daily_agent_data = aggregate_daily_data(df, 'roi') | |
| # Step 2: Calculate daily medians across all agents | |
| daily_medians = calculate_daily_medians(daily_agent_data, 'roi') | |
| # Step 3: Calculate 7-day moving average of daily medians | |
| daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7) | |
| logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians") | |
| # Find the last date where we have valid moving average data | |
| last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None | |
| # If we don't have any valid moving average data, use the max time from the original data | |
| last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max() | |
| logger.info(f"Last valid moving average date: {last_valid_ma_date}") | |
| logger.info(f"Using last valid date for graph: {last_valid_date}") | |
| # Plot individual agent daily data points with agent names in hover, but limit display for scalability | |
| if not daily_agent_data.empty: | |
| # Group by agent to use different colors for each agent | |
| unique_agents = daily_agent_data['agent_name'].unique() | |
| colors = px.colors.qualitative.Plotly[:len(unique_agents)] | |
| # Create a color map for agents | |
| color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} | |
| # Calculate the total number of data points per agent to determine which are most active | |
| agent_counts = daily_agent_data['agent_name'].value_counts() | |
| # Determine how many agents to show individually (limit to top 5 most active) | |
| MAX_VISIBLE_AGENTS = 5 | |
| top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() | |
| logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") | |
| # Add daily aggregated data points for each agent, but only make top agents visible by default | |
| for agent_name in unique_agents: | |
| agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name] | |
| # Explicitly convert to Python lists | |
| x_values = agent_data['timestamp'].tolist() | |
| y_values = agent_data['roi'].tolist() | |
| # Change default visibility to False to hide all agent data points | |
| is_visible = False | |
| # Add data points as markers for ROI | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values, | |
| y=y_values, | |
| mode='markers', # Only markers for original data | |
| marker=dict( | |
| color=color_map[agent_name], | |
| symbol='circle', | |
| size=10, | |
| line=dict(width=1, color='black') | |
| ), | |
| name=f'Agent: {agent_name} (Daily ROI)', | |
| hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>', | |
| visible=is_visible # All agents hidden by default | |
| ) | |
| ) | |
| logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") | |
| # Add ROI 7-day moving average of daily medians as a smooth line | |
| x_values_ma = daily_medians_with_ma['timestamp'].tolist() | |
| y_values_ma = daily_medians_with_ma['moving_avg'].tolist() | |
| # Create hover template for the ROI moving average line | |
| hover_data_roi = [] | |
| for idx, row in daily_medians_with_ma.iterrows(): | |
| timestamp = row['timestamp'] | |
| # Format timestamp to show only date for daily data | |
| formatted_timestamp = timestamp.strftime('%Y-%m-%d') | |
| # Calculate number of active agents on this date | |
| active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) | |
| # DEBUG: Log agent counts for July 8th specifically | |
| if formatted_timestamp == '2025-07-08': | |
| agents_on_date = daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique() | |
| logger.info(f"ROI GRAPH - July 8th active agents: {active_agents}") | |
| logger.info(f"ROI GRAPH - July 8th agent IDs: {sorted(agents_on_date.tolist())}") | |
| hover_data_roi.append( | |
| f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}" | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values_ma, | |
| y=y_values_ma, | |
| mode='lines', # Only lines for moving average | |
| line=dict(color='blue', width=3, shape='spline', smoothing=1.3), # Smooth curved line like APR | |
| name='Median ROI (7d window)', | |
| hovertext=hover_data_roi, | |
| hoverinfo='text', | |
| visible=True # Visible by default | |
| ) | |
| ) | |
| logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points") | |
| # Update layout with average runtime information in the title | |
| fig.update_layout( | |
| title=dict( | |
| text=f"Optimus Agents ROI (over avg. {avg_runtime:.1f} days runtime)", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=22, | |
| color="black", | |
| weight="bold" | |
| ) | |
| ), | |
| xaxis_title=None, # Remove x-axis title to use annotation instead | |
| yaxis_title=None, # Remove the y-axis title as we'll use annotations instead | |
| template="plotly_white", | |
| height=600, # Reduced height for better fit on smaller screens | |
| autosize=True, # Enable auto-sizing for responsiveness | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1, | |
| groupclick="toggleitem" | |
| ), | |
| margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title | |
| hovermode="closest" | |
| ) | |
| # Add single annotation for y-axis | |
| fig.add_annotation( | |
| x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
| y=0, # Center of the y-axis | |
| xref="paper", | |
| yref="y", | |
| text="ROI [%]", | |
| showarrow=False, | |
| font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| textangle=-90, # Rotate text to be vertical | |
| align="center" | |
| ) | |
| # Update layout for legend | |
| fig.update_layout( | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1, | |
| groupclick="toggleitem", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=14, # Adjusted font size | |
| color="black", | |
| weight="bold" | |
| ) | |
| ) | |
| ) | |
| # Update y-axis with clipping at -5 | |
| fig.update_yaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| range=[-5, 10], # Clip bottom at -5, reasonable top at 10 | |
| tickformat=".1f", # Format tick labels with 1 decimal place for better precision | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| title=None # Remove the built-in axis title since we're using annotations | |
| ) | |
| # Update x-axis with better formatting and hardcoded date range (June 6 to July 8) | |
| min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025 | |
| max_date = datetime(2025, 7, 31) # Hardcoded end date: July 8, 2025 | |
| logger.info(f"ROI Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}") | |
| fig.update_xaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| # Set hardcoded range from June 6 to June 17, 2025 | |
| range=[min_date, max_date], | |
| autorange=False, # Explicitly disable autoscale | |
| tickformat="%b %d", # Simplified date format without time | |
| tickangle=-30, # Angle the labels for better readability | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| title=None # Remove built-in title to use annotation instead | |
| ) | |
| try: | |
| # Save the figure | |
| graph_file = "optimus_roi_graph.html" | |
| fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) | |
| # Skip PNG generation to avoid Kaleido/Chrome dependency issues in HF Spaces | |
| logger.info(f"ROI graph saved to {graph_file} (PNG generation skipped for HF Spaces compatibility)") | |
| # Return the figure object for direct use in Gradio | |
| return fig | |
| except Exception as e: | |
| # If the complex graph approach fails, create a simpler one | |
| logger.error(f"Error creating advanced ROI graph: {e}") | |
| logger.info("Falling back to Simpler ROI graph") | |
| # Create a simpler graph as fallback | |
| simple_fig = go.Figure() | |
| # Add zero line | |
| simple_fig.add_shape( | |
| type="line", | |
| line=dict(dash="solid", width=1.5, color="black"), | |
| y0=0, y1=0, | |
| x0=min_time, x1=max_time | |
| ) | |
| # Add background shapes with fixed reasonable ranges | |
| simple_fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(230, 243, 255, 0.3)", | |
| line=dict(width=0), | |
| y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| simple_fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(255, 230, 230, 0.3)", | |
| line=dict(width=0), | |
| y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| # Simply plot the average ROI data with moving average | |
| if not avg_roi_data.empty: | |
| # Add moving average as a line | |
| simple_fig.add_trace( | |
| go.Scatter( | |
| x=avg_roi_data_with_ma['timestamp'], | |
| y=avg_roi_data_with_ma['moving_avg'], | |
| mode='lines', | |
| name='Average ROI (3d window)', | |
| line=dict(width=2, color='blue') # Thinner line | |
| ) | |
| ) | |
| # Simplified layout with adjusted y-axis range | |
| simple_fig.update_layout( | |
| title=dict( | |
| text="Optimus Agents ROI", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=22, | |
| color="black", | |
| weight="bold" | |
| ) | |
| ), | |
| xaxis_title=None, | |
| yaxis_title=None, | |
| template="plotly_white", | |
| height=600, | |
| autosize=True, | |
| margin=dict(r=30, l=120, t=40, b=50) | |
| ) | |
| # Update y-axis with fixed range for ROI (-10 to 10) | |
| simple_fig.update_yaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| range=[-10, 10], # Set fixed range from -10 to 10 | |
| tickformat=".2f", | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), | |
| title=None # Remove the built-in axis title since we're using annotations | |
| ) | |
| # Update x-axis with better formatting and autoscaling | |
| simple_fig.update_xaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| autorange=True, # Enable autoscaling | |
| tickformat="%b %d", | |
| tickangle=-30, | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") | |
| ) | |
| # Save the figure | |
| graph_file = "optimus_roi_graph.html" | |
| simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) | |
| # Return the simple figure | |
| return simple_fig | |
| def save_roi_to_csv(df): | |
| """Save the ROI data DataFrame to a CSV file and return the file path""" | |
| if df.empty: | |
| logger.error("No ROI data to save to CSV") | |
| return None | |
| # Define the CSV file path | |
| csv_file = "optimus_roi_values.csv" | |
| # Save to CSV | |
| df.to_csv(csv_file, index=False) | |
| logger.info(f"ROI data saved to {csv_file}") | |
| return csv_file | |
| def create_time_series_graph_per_agent(df): | |
| """Create a time series graph for each agent using Plotly""" | |
| # Get unique agents | |
| unique_agents = df['agent_id'].unique() | |
| if len(unique_agents) == 0: | |
| logger.error("No agent data to plot") | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No agent data available", | |
| x=0.5, y=0.5, | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig | |
| # Create a subplot figure for each agent | |
| fig = make_subplots(rows=len(unique_agents), cols=1, | |
| subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}" | |
| for agent_id in unique_agents], | |
| vertical_spacing=0.1) | |
| # Plot data for each agent | |
| for i, agent_id in enumerate(unique_agents): | |
| agent_data = df[df['agent_id'] == agent_id].copy() | |
| agent_name = agent_data['agent_name'].iloc[0] | |
| row = i + 1 | |
| # Add zero line to separate APR and Performance | |
| fig.add_shape( | |
| type="line", line=dict(dash="solid", width=1.5, color="black"), | |
| y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), | |
| row=row, col=1 | |
| ) | |
| # Add background colors with dynamic values | |
| fig.add_shape( | |
| type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), | |
| y0=0, y1=agent_data['apr'].max() * 1.1 if not agent_data.empty else 10, | |
| x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), | |
| row=row, col=1, layer="below" | |
| ) | |
| fig.add_shape( | |
| type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), | |
| y0=agent_data['apr'].min() * 1.1 if not agent_data.empty else -10, y1=0, | |
| x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), | |
| row=row, col=1, layer="below" | |
| ) | |
| # Create separate dataframes for different data types | |
| apr_data = agent_data[agent_data['metric_type'] == 'APR'] | |
| perf_data = agent_data[agent_data['metric_type'] == 'Performance'] | |
| # Sort all data by timestamp for the line plots | |
| combined_agent_data = agent_data.sort_values('timestamp') | |
| # Add main line connecting all points | |
| fig.add_trace( | |
| go.Scatter( | |
| x=combined_agent_data['timestamp'], | |
| y=combined_agent_data['apr'], | |
| mode='lines', | |
| line=dict(color='purple', width=2), | |
| name=f'{agent_name}', | |
| legendgroup=agent_name, | |
| showlegend=(i == 0), # Only show in legend once | |
| hovertemplate='Time: %{x}<br>Value: %{y:.2f}<extra></extra>' | |
| ), | |
| row=row, col=1 | |
| ) | |
| # Add scatter points for APR values | |
| if not apr_data.empty: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=apr_data['timestamp'], | |
| y=apr_data['apr'], | |
| mode='markers', | |
| marker=dict(color='blue', size=10, symbol='circle'), | |
| name='APR', | |
| legendgroup='APR', | |
| showlegend=(i == 0), | |
| hovertemplate='Time: %{x}<br>APR: %{y:.2f}<extra></extra>' | |
| ), | |
| row=row, col=1 | |
| ) | |
| # Add scatter points for Performance values | |
| if not perf_data.empty: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=perf_data['timestamp'], | |
| y=perf_data['apr'], | |
| mode='markers', | |
| marker=dict(color='red', size=10, symbol='square'), | |
| name='Performance', | |
| legendgroup='Performance', | |
| showlegend=(i == 0), | |
| hovertemplate='Time: %{x}<br>Performance: %{y:.2f}<extra></extra>' | |
| ), | |
| row=row, col=1 | |
| ) | |
| # Update axes | |
| fig.update_xaxes(title_text="Time", row=row, col=1) | |
| fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)') | |
| # Update layout | |
| fig.update_layout( | |
| height=400 * len(unique_agents), | |
| width=1000, | |
| title_text="APR and Performance Values per Agent", | |
| template="plotly_white", | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1 | |
| ), | |
| margin=dict(r=20, l=20, t=30, b=20), | |
| hovermode="closest" | |
| ) | |
| # Save the figure (still useful for reference) | |
| graph_file = "optimus_apr_per_agent_graph.html" | |
| fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) | |
| # Skip PNG generation to avoid Kaleido/Chrome dependency issues in HF Spaces | |
| logger.info(f"Per-agent graph saved to {graph_file} (PNG generation skipped for HF Spaces compatibility)") | |
| # Return the figure object for direct use in Gradio | |
| return fig | |
| def write_debug_info(df, fig): | |
| """Disabled debug info function to prevent Content-Length issues""" | |
| # Completely disable debug file generation for HF Spaces | |
| return True | |
| def create_combined_time_series_graph(df): | |
| """Create a time series graph showing average APR values across all agents""" | |
| if len(df) == 0: | |
| logger.error("No data to plot combined graph") | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available", | |
| x=0.5, y=0.5, | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig | |
| # IMPORTANT: Force data types to ensure consistency | |
| df['apr'] = df['apr'].astype(float) # Ensure APR is float | |
| df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string | |
| # Get min and max time for shapes | |
| min_time = df['timestamp'].min() | |
| max_time = df['timestamp'].max() | |
| # Use the actual start date from the data instead of a fixed date | |
| x_start_date = min_time | |
| # CRITICAL: Log the exact dataframe we're using for plotting to help debug | |
| logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}") | |
| logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}") | |
| logger.info("Graph data - all positive APR values only") | |
| logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}") | |
| # Debug file generation disabled for HF Spaces to prevent Content-Length issues | |
| logger.debug(f"Graph data processed: {len(df)} records") | |
| # ENSURE THERE ARE NO CONFLICTING AXES OR TRACES | |
| # Create Plotly figure in a clean state | |
| fig = go.Figure() | |
| # Enable autoscaling instead of fixed ranges | |
| logger.info("Using autoscaling for axes ranges") | |
| # Add background shapes for APR and Performance regions | |
| min_time = df['timestamp'].min() | |
| max_time = df['timestamp'].max() | |
| # Add shape for positive APR region (above zero) - use reasonable fixed range | |
| fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(230, 243, 255, 0.3)", | |
| line=dict(width=0), | |
| y0=0, y1=200, # Fixed positive range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| # Add shape for negative APR region (below zero) - use reasonable fixed range | |
| fig.add_shape( | |
| type="rect", | |
| fillcolor="rgba(255, 230, 230, 0.3)", | |
| line=dict(width=0), | |
| y0=-200, y1=0, # Fixed negative range to avoid extreme outliers affecting the view | |
| x0=min_time, x1=max_time, | |
| layer="below" | |
| ) | |
| # Add zero line | |
| fig.add_shape( | |
| type="line", | |
| line=dict(dash="solid", width=1.5, color="black"), | |
| y0=0, y1=0, | |
| x0=min_time, x1=max_time | |
| ) | |
| # MODIFIED: Calculate average APR values across all agents for each timestamp | |
| # Filter for APR data only | |
| logger.info("=== APR GRAPH DATA FILTERING DEBUG ===") | |
| logger.info(f"Initial APR data loaded: {len(df)} records") | |
| logger.info(f"Unique agents in initial data: {df['agent_id'].nunique()}") | |
| logger.info(f"Agent IDs in initial data: {sorted(df['agent_id'].unique().tolist())}") | |
| # Check metric_type distribution | |
| if 'metric_type' in df.columns: | |
| metric_counts = df['metric_type'].value_counts() | |
| logger.info(f"Metric type distribution: {metric_counts.to_dict()}") | |
| else: | |
| logger.warning("No 'metric_type' column found in APR data") | |
| apr_data = df[df['metric_type'] == 'APR'].copy() | |
| logger.info(f"After metric_type == 'APR' filter: {len(apr_data)} records") | |
| logger.info(f"Unique agents after APR filter: {apr_data['agent_id'].nunique()}") | |
| logger.info(f"Agent IDs after APR filter: {sorted(apr_data['agent_id'].unique().tolist())}") | |
| # Date-based APR percentage filtering: ±500% filter until June 22, 2025, then no filter | |
| cutoff_date = datetime(2025, 6, 22) | |
| before_cutoff = apr_data[apr_data['timestamp'] < cutoff_date] | |
| after_cutoff = apr_data[apr_data['timestamp'] >= cutoff_date] | |
| # Apply ±500% filter to data before June 22, 2025 | |
| before_outlier_filter = len(before_cutoff) | |
| before_cutoff_filtered = before_cutoff[(before_cutoff['apr'] <= 500) & (before_cutoff['apr'] >= -500)] | |
| after_outlier_filter = len(before_cutoff_filtered) | |
| excluded_by_outlier = before_outlier_filter - after_outlier_filter | |
| logger.info(f"APR filtering before June 22, 2025: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded by ±500% filter)") | |
| # No filtering for data after June 22, 2025 | |
| logger.info(f"APR filtering after June 22, 2025: {len(after_cutoff)} data points (no percentage filter applied)") | |
| # Combine filtered before data with unfiltered after data | |
| apr_data = pd.concat([before_cutoff_filtered, after_cutoff], ignore_index=True) | |
| logger.info(f"Total APR data after date-based filtering: {len(apr_data)} data points") | |
| # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025) | |
| min_date = datetime(2025, 6, 6) | |
| max_date = datetime(2025, 7, 31, 23, 59, 59) # Include all of July 8th | |
| logger.info(f"Filtering APR data to date range: {min_date} to {max_date}") | |
| # Count data points before filtering | |
| before_filter_count = len(apr_data) | |
| # Apply date filter | |
| apr_data = apr_data[(apr_data['timestamp'] >= min_date) & (apr_data['timestamp'] <= max_date)] | |
| # Count data points after filtering | |
| after_filter_count = len(apr_data) | |
| excluded_by_date = before_filter_count - after_filter_count | |
| logger.info(f"Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)") | |
| # NEW APPROACH: Daily aggregation and median calculation for APR | |
| # Step 1: Aggregate data daily per agent (mean of values within each day) | |
| daily_agent_data = aggregate_daily_data(apr_data, 'apr') | |
| # Step 2: Calculate daily medians across all agents | |
| daily_medians = calculate_daily_medians(daily_agent_data, 'apr') | |
| # Step 3: Calculate 7-day moving average of daily medians | |
| daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7) | |
| # Also handle adjusted APR if it exists | |
| daily_medians_adjusted = None | |
| daily_medians_adjusted_with_ma = None | |
| if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any(): | |
| # Create a separate dataset for adjusted APR | |
| apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy() | |
| if not apr_data_with_adjusted.empty: | |
| # Step 1: Aggregate adjusted APR data daily per agent | |
| daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr') | |
| # Step 2: Calculate daily medians for adjusted APR | |
| daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr') | |
| # Step 3: Calculate 7-day moving average of daily medians for adjusted APR | |
| daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7) | |
| logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians") | |
| if daily_medians_adjusted is not None: | |
| logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians") | |
| # This old moving average calculation is no longer needed with the new daily median approach | |
| # Find the last date where we have valid moving average data | |
| last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None | |
| # Find the last date where we have valid adjusted moving average data | |
| last_valid_adj_ma_date = None | |
| if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty: | |
| last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None | |
| # Determine the last valid date for either moving average | |
| last_valid_date = last_valid_ma_date | |
| if last_valid_adj_ma_date is not None: | |
| last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date | |
| # If we don't have any valid moving average data, use the max time from the original data | |
| if last_valid_date is None: | |
| last_valid_date = df['timestamp'].max() | |
| logger.info(f"Last valid moving average date: {last_valid_ma_date}") | |
| logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}") | |
| logger.info(f"Using last valid date for graph: {last_valid_date}") | |
| # Plot individual agent data points with agent names in hover, but limit display for scalability | |
| if not apr_data.empty: | |
| # Group by agent to use different colors for each agent | |
| unique_agents = apr_data['agent_name'].unique() | |
| colors = px.colors.qualitative.Plotly[:len(unique_agents)] | |
| # Create a color map for agents | |
| color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} | |
| # Calculate the total number of data points per agent to determine which are most active | |
| agent_counts = apr_data['agent_name'].value_counts() | |
| # Determine how many agents to show individually (limit to top 5 most active) | |
| MAX_VISIBLE_AGENTS = 5 | |
| top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() | |
| logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") | |
| # Add data points for each agent, but only make top agents visible by default | |
| for agent_name in unique_agents: | |
| agent_data = apr_data[apr_data['agent_name'] == agent_name] | |
| # Explicitly convert to Python lists | |
| x_values = agent_data['timestamp'].tolist() | |
| y_values = agent_data['apr'].tolist() | |
| # Change default visibility to False to hide all agent data points | |
| is_visible = False | |
| # Add data points as markers for APR | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values, | |
| y=y_values, | |
| mode='markers', # Only markers for original data | |
| marker=dict( | |
| color=color_map[agent_name], | |
| symbol='circle', | |
| size=10, | |
| line=dict(width=1, color='black') | |
| ), | |
| name=f'Agent: {agent_name} (APR)', | |
| hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>', | |
| visible=is_visible # All agents hidden by default | |
| ) | |
| ) | |
| logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") | |
| # Add data points for adjusted APR if it exists | |
| if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any(): | |
| x_values_adj = agent_data['timestamp'].tolist() | |
| y_values_adj = agent_data['adjusted_apr'].tolist() | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values_adj, | |
| y=y_values_adj, | |
| mode='markers', # Only markers for original data | |
| marker=dict( | |
| color=color_map[agent_name], | |
| symbol='diamond', # Different symbol for adjusted APR | |
| size=10, | |
| line=dict(width=1, color='black') | |
| ), | |
| name=f'Agent: {agent_name} (Adjusted APR)', | |
| hovertemplate='Time: %{x}<br>Adjusted APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>', | |
| visible=is_visible # All agents hidden by default | |
| ) | |
| ) | |
| logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})") | |
| # Add APR 7-day moving average of daily medians as a smooth line | |
| x_values_ma = daily_medians_with_ma['timestamp'].tolist() | |
| y_values_ma = daily_medians_with_ma['moving_avg'].tolist() | |
| # Create hover template for the APR moving average line | |
| # CONSISTENCY FIX: Use ROI daily agent data for active agent counts | |
| hover_data_apr = [] | |
| for idx, row in daily_medians_with_ma.iterrows(): | |
| timestamp = row['timestamp'] | |
| # Format timestamp to show only date for daily data | |
| formatted_timestamp = timestamp.strftime('%Y-%m-%d') | |
| # FIXED: Use ROI data to get consistent active agent counts | |
| # Load ROI data to get the correct agent counts | |
| try: | |
| df_roi_for_counts, _ = load_apr_data_from_csv() | |
| if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns: | |
| # Filter for ROI data and same date | |
| df_roi_filtered = df_roi_for_counts[ | |
| (df_roi_for_counts['metric_type'] == 'APR') & | |
| (df_roi_for_counts['roi'].notna()) | |
| ].copy() | |
| # Aggregate daily for ROI data | |
| roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi') | |
| # Get active agents from ROI data for this date | |
| active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) | |
| else: | |
| # Fallback to APR data if ROI not available | |
| active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) | |
| except: | |
| # Fallback to APR data if there's any error | |
| active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) | |
| # DEBUG: Log agent counts for July 8th specifically | |
| if formatted_timestamp == '2025-07-08': | |
| logger.info(f"APR GRAPH - July 8th active agents (using ROI logic): {active_agents}") | |
| hover_data_apr.append( | |
| f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}" | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values_ma, | |
| y=y_values_ma, | |
| mode='lines', # Only lines for moving average | |
| line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line | |
| name='Median APR (7d window)', | |
| hovertext=hover_data_apr, | |
| hoverinfo='text', | |
| visible=True # Visible by default | |
| ) | |
| ) | |
| logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points") | |
| # Add adjusted APR 7-day moving average line if it exists | |
| if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty: | |
| x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist() | |
| y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist() | |
| # Create hover template for the adjusted APR moving average line | |
| # CONSISTENCY FIX: Use ROI daily agent data for active agent counts (same as regular APR) | |
| hover_data_adj = [] | |
| for idx, row in daily_medians_adjusted_with_ma.iterrows(): | |
| timestamp = row['timestamp'] | |
| # Format timestamp to show only date for daily data | |
| formatted_timestamp = timestamp.strftime('%Y-%m-%d') | |
| # FIXED: Use ROI data to get consistent active agent counts (same logic as APR) | |
| try: | |
| df_roi_for_counts, _ = load_apr_data_from_csv() | |
| if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns: | |
| # Filter for ROI data and same date | |
| df_roi_filtered = df_roi_for_counts[ | |
| (df_roi_for_counts['metric_type'] == 'APR') & | |
| (df_roi_for_counts['roi'].notna()) | |
| ].copy() | |
| # Aggregate daily for ROI data | |
| roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi') | |
| # Get active agents from ROI data for this date | |
| active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) | |
| else: | |
| # Fallback to adjusted APR data if ROI not available | |
| active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0 | |
| except: | |
| # Fallback to adjusted APR data if there's any error | |
| active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0 | |
| # DEBUG: Log agent counts for July 8th specifically | |
| if formatted_timestamp == '2025-07-08': | |
| logger.info(f"ADJUSTED APR GRAPH - July 8th active agents (using ROI logic): {active_agents}") | |
| hover_data_adj.append( | |
| f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}" | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=x_values_adj_ma, | |
| y=y_values_adj_ma, | |
| mode='lines', # Only lines for moving average | |
| line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line | |
| name='Median Adjusted APR (7d window)', | |
| hovertext=hover_data_adj, | |
| hoverinfo='text', | |
| visible=True # Visible by default | |
| ) | |
| ) | |
| logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points") | |
| else: | |
| logger.warning("No adjusted APR moving average data available to plot") | |
| # Removed cumulative APR as requested | |
| logger.info("Cumulative APR graph line has been removed as requested") | |
| # Update layout - use simple boolean values everywhere | |
| # Make chart responsive instead of fixed width | |
| fig.update_layout( | |
| title=dict( | |
| text="Optimus Agents", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=22, | |
| color="black", | |
| weight="bold" | |
| ) | |
| ), | |
| xaxis_title=None, # Remove x-axis title to use annotation instead | |
| yaxis_title=None, # Remove the y-axis title as we'll use annotations instead | |
| template="plotly_white", | |
| height=600, # Reduced height for better fit on smaller screens | |
| # Removed fixed width to enable responsiveness | |
| autosize=True, # Enable auto-sizing for responsiveness | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1, | |
| groupclick="toggleitem" | |
| ), | |
| margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title | |
| hovermode="closest" | |
| ) | |
| # Add two separate annotations for y-axis titles | |
| # First annotation for "Percent drawdown (%)" | |
| fig.add_annotation( | |
| x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
| y=-25, # Position in the negative region | |
| xref="paper", | |
| yref="y", | |
| text="Percent drawdown (%)", | |
| showarrow=False, | |
| font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| textangle=-90, # Rotate text to be vertical | |
| align="center" | |
| ) | |
| # Second annotation for "Agent APR (%)" | |
| fig.add_annotation( | |
| x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
| y=75, # Position in the positive region | |
| xref="paper", | |
| yref="y", | |
| text="Agent APR (%)", | |
| showarrow=False, | |
| font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| textangle=-90, # Rotate text to be vertical | |
| align="center" | |
| ) | |
| # Remove x-axis title annotation | |
| # fig.add_annotation( | |
| # x=0.5, # Center of the x-axis | |
| # y=-0.15, # Below the x-axis | |
| # xref="paper", | |
| # yref="paper", | |
| # text="Date", | |
| # showarrow=False, | |
| # font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| # align="center" | |
| # ) | |
| # Update layout for legend | |
| fig.update_layout( | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1, | |
| groupclick="toggleitem", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=14, # Adjusted font size | |
| color="black", | |
| weight="bold" | |
| ) | |
| ) | |
| ) | |
| # Update y-axis with clipping at -50 | |
| fig.update_yaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| range=[-50, 200], # Clip bottom at -50, reasonable top at 200 | |
| tickformat=".2f", # Format tick labels with 2 decimal places | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| title=None # Remove the built-in axis title since we're using annotations | |
| ) | |
| # Update x-axis with better formatting and hardcoded date range (June 6 to July 8) | |
| min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025 | |
| max_date = datetime(2025, 7, 31) # Hardcoded end date: July 8, 2025 | |
| logger.info(f"APR Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}") | |
| fig.update_xaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='rgba(0,0,0,0.1)', | |
| # Set hardcoded range from June 6 to June 18, 2025 | |
| range=[min_date, max_date], | |
| autorange=False, # Explicitly disable autoscale | |
| tickformat="%b %d", # Simplified date format without time | |
| tickangle=-30, # Angle the labels for better readability | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| title=None # Remove built-in title to use annotation instead | |
| ) | |
| # SIMPLIFIED APPROACH: Do a direct plot without markers for comparison | |
| # This creates a simple, reliable fallback plot if the advanced one fails | |
| try: | |
| # Write detailed debug information before saving the figure | |
| write_debug_info(df, fig) | |
| # Save the figure (still useful for reference) | |
| graph_file = "optimus_apr_combined_graph.html" | |
| fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) | |
| # Skip PNG generation to avoid Kaleido/Chrome dependency issues in HF Spaces | |
| logger.info(f"Combined graph saved to {graph_file} (PNG generation skipped for HF Spaces compatibility)") | |
| # Return the figure object for direct use in Gradio | |
| return fig | |
| except Exception as e: | |
| # If the complex graph approach fails, create a simpler one | |
| logger.error(f"Error creating advanced graph: {e}") | |
| logger.info("Falling back to Simpler graph") | |
| # Create a simpler graph as fallback | |
| simple_fig = go.Figure() | |
| # Add zero line | |
| simple_fig.add_shape( | |
| type="line", | |
| line=dict(dash="solid", width=1.5, color="black"), | |
| y0=0, y1=0, | |
| x0=min_time, x1=max_time | |
| ) | |
| # Define colors for the fallback graph | |
| fallback_colors = px.colors.qualitative.Plotly | |
| # Simply plot the average APR data with moving average | |
| if not avg_apr_data.empty: | |
| # Sort by timestamp | |
| avg_apr_data = avg_apr_data.sort_values('timestamp') | |
| # Calculate both moving averages for the fallback graph | |
| avg_apr_data_with_ma = avg_apr_data.copy() | |
| avg_apr_data_with_ma['moving_avg'] = None # 2-hour window | |
| avg_apr_data_with_ma['infinite_avg'] = None # Infinite window | |
| # Define the time window (6 hours) | |
| time_window = pd.Timedelta(hours=6) | |
| # Calculate the moving averages for each timestamp | |
| for i, row in avg_apr_data_with_ma.iterrows(): | |
| current_time = row['timestamp'] | |
| window_start = current_time - time_window | |
| # Get all data points within the 2-hour time window | |
| window_data = apr_data[ | |
| (apr_data['timestamp'] >= window_start) & | |
| (apr_data['timestamp'] <= current_time) | |
| ] | |
| # Get all data points up to the current timestamp (infinite window) | |
| infinite_window_data = apr_data[ | |
| apr_data['timestamp'] <= current_time | |
| ] | |
| # Calculate the average APR for the 2-hour time window | |
| if not window_data.empty: | |
| avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean() | |
| else: | |
| # If no data points in the window, use the current value | |
| avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr'] | |
| # Calculate the average APR for the infinite window | |
| if not infinite_window_data.empty: | |
| avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean() | |
| else: | |
| avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr'] | |
| # Add data points for each agent, but only make top agents visible by default | |
| unique_agents = apr_data['agent_name'].unique() | |
| colors = px.colors.qualitative.Plotly[:len(unique_agents)] | |
| color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} | |
| # Calculate the total number of data points per agent | |
| agent_counts = apr_data['agent_name'].value_counts() | |
| # Determine how many agents to show individually (limit to top 5 most active) | |
| MAX_VISIBLE_AGENTS = 5 | |
| top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() | |
| for agent_name in unique_agents: | |
| agent_data = apr_data[apr_data['agent_name'] == agent_name] | |
| # Determine if this agent should be visible by default | |
| is_visible = agent_name in top_agents | |
| # Add data points as markers | |
| simple_fig.add_trace( | |
| go.Scatter( | |
| x=agent_data['timestamp'], | |
| y=agent_data['apr'], | |
| mode='markers', | |
| name=f'Agent: {agent_name}', | |
| marker=dict( | |
| size=10, | |
| color=color_map[agent_name] | |
| ), | |
| hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>', | |
| visible=is_visible # Only top agents visible by default | |
| ) | |
| ) | |
| # Add 2-hour moving average as a line | |
| simple_fig.add_trace( | |
| go.Scatter( | |
| x=avg_apr_data_with_ma['timestamp'], | |
| y=avg_apr_data_with_ma['moving_avg'], | |
| mode='lines', | |
| name='Average APR (6h window)', | |
| line=dict(width=2, color='red') # Thinner line | |
| ) | |
| ) | |
| # Add infinite window moving average as another line | |
| simple_fig.add_trace( | |
| go.Scatter( | |
| x=avg_apr_data_with_ma['timestamp'], | |
| y=avg_apr_data_with_ma['infinite_avg'], | |
| mode='lines', | |
| name='Cumulative Average APR (all data)', | |
| line=dict(width=4, color='green') # Thicker solid line | |
| ) | |
| ) | |
| # Simplified layout with fixed y-axis range (-10 to 10) and increased size | |
| simple_fig.update_layout( | |
| title=dict( | |
| text="Optimus Agents", | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=22, | |
| color="black", | |
| weight="bold" | |
| ) | |
| ), | |
| xaxis_title=None, # Remove x-axis title to use annotation instead | |
| yaxis_title=None, # Remove the y-axis title as we'll use annotations instead | |
| yaxis=dict( | |
| # Fixed range from -10 to 10 | |
| range=[-10, 10], # Set fixed range from -10 to 10 | |
| tickformat=".2f", # Format tick labels with 2 decimal places | |
| tickfont=dict(size=12) # Larger font for tick labels | |
| ), | |
| height=600, # Reduced height for better fit | |
| # Removed fixed width to enable responsiveness | |
| autosize=True, # Enable auto-sizing for responsiveness | |
| template="plotly_white", # Use a cleaner template | |
| margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title | |
| ) | |
| # Add annotations for y-axis regions in the fallback graph | |
| simple_fig.add_annotation( | |
| x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
| y=-25, # Middle of the negative region | |
| xref="paper", | |
| yref="y", | |
| text="Percent drawdown [%]", | |
| showarrow=False, | |
| font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| textangle=-90, # Rotate text to be vertical | |
| align="center" | |
| ) | |
| simple_fig.add_annotation( | |
| x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
| y=50, # Middle of the positive region | |
| xref="paper", | |
| yref="y", | |
| text="Agent APR [%]", | |
| showarrow=False, | |
| font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| textangle=-90, # Rotate text to be vertical | |
| align="center" | |
| ) | |
| # Remove x-axis title annotation | |
| # simple_fig.add_annotation( | |
| # x=0.5, # Center of the x-axis | |
| # y=-0.15, # Below the x-axis | |
| # xref="paper", | |
| # yref="paper", | |
| # text="Date", | |
| # showarrow=False, | |
| # font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| # align="center" | |
| # ) | |
| # Update legend font for fallback graph | |
| simple_fig.update_layout( | |
| legend=dict( | |
| font=dict( | |
| family="Arial, sans-serif", | |
| size=14, # Adjusted font size | |
| color="black", | |
| weight="bold" | |
| ) | |
| ) | |
| ) | |
| # Apply autoscaling to the x-axis for the fallback graph | |
| simple_fig.update_xaxes( | |
| autorange=True, # Enable autoscaling | |
| tickformat="%b %d", # Simplified date format without time | |
| tickangle=-30, | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
| title=None # Remove built-in title to use annotation instead | |
| ) | |
| # Update y-axis tick font for fallback graph | |
| simple_fig.update_yaxes( | |
| tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size | |
| ) | |
| # Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS | |
| if len(unique_agents) > MAX_VISIBLE_AGENTS: | |
| simple_fig.add_annotation( | |
| text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.", | |
| xref="paper", yref="paper", | |
| x=0.5, y=1.05, | |
| showarrow=False, | |
| font=dict(size=12, color="gray"), | |
| align="center" | |
| ) | |
| # Return the simple figure | |
| return simple_fig | |
| def save_to_csv(df): | |
| """Save the APR data DataFrame to a CSV file and return the file path""" | |
| if df.empty: | |
| logger.error("No APR data to save to CSV") | |
| return None | |
| # Define the CSV file path | |
| csv_file = "optimus_apr_values.csv" | |
| # Save to CSV | |
| df.to_csv(csv_file, index=False) | |
| logger.info(f"APR data saved to {csv_file}") | |
| # Also generate a statistics CSV file | |
| stats_df = generate_statistics_from_data(df) | |
| stats_csv = "optimus_apr_statistics.csv" | |
| stats_df.to_csv(stats_csv, index=False) | |
| logger.info(f"Statistics saved to {stats_csv}") | |
| # Log detailed statistics about adjusted APR | |
| if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any(): | |
| adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()] | |
| logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}") | |
| for _, row in adjusted_stats.iterrows(): | |
| if row['agent_id'] != 'ALL': # Skip the overall stats row | |
| logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}") | |
| # Log overall adjusted APR stats | |
| overall_row = stats_df[stats_df['agent_id'] == 'ALL'] | |
| if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]): | |
| logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}") | |
| return csv_file | |
| def generate_statistics_from_data(df): | |
| """Generate statistics from the APR data""" | |
| if df.empty: | |
| return pd.DataFrame() | |
| # Get unique agents | |
| unique_agents = df['agent_id'].unique() | |
| stats_list = [] | |
| # Generate per-agent statistics | |
| for agent_id in unique_agents: | |
| agent_data = df[df['agent_id'] == agent_id] | |
| agent_name = agent_data['agent_name'].iloc[0] | |
| # APR statistics | |
| apr_data = agent_data[agent_data['metric_type'] == 'APR'] | |
| real_apr = apr_data[apr_data['is_dummy'] == False] | |
| # Performance statistics | |
| perf_data = agent_data[agent_data['metric_type'] == 'Performance'] | |
| real_perf = perf_data[perf_data['is_dummy'] == False] | |
| # Check if adjusted_apr exists and has non-null values | |
| has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any() | |
| stats = { | |
| 'agent_id': agent_id, | |
| 'agent_name': agent_name, | |
| 'total_points': len(agent_data), | |
| 'apr_points': len(apr_data), | |
| 'performance_points': len(perf_data), | |
| 'real_apr_points': len(real_apr), | |
| 'real_performance_points': len(real_perf), | |
| 'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None, | |
| 'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None, | |
| 'max_apr': apr_data['apr'].max() if not apr_data.empty else None, | |
| 'min_apr': apr_data['apr'].min() if not apr_data.empty else None, | |
| 'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None, | |
| 'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None, | |
| 'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None, | |
| 'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None | |
| } | |
| stats_list.append(stats) | |
| # Generate overall statistics | |
| apr_only = df[df['metric_type'] == 'APR'] | |
| perf_only = df[df['metric_type'] == 'Performance'] | |
| # Check if adjusted_apr exists and has non-null values for overall stats | |
| has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any() | |
| overall_stats = { | |
| 'agent_id': 'ALL', | |
| 'agent_name': 'All Agents', | |
| 'total_points': len(df), | |
| 'apr_points': len(apr_only), | |
| 'performance_points': len(perf_only), | |
| 'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]), | |
| 'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]), | |
| 'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None, | |
| 'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None, | |
| 'max_apr': apr_only['apr'].max() if not apr_only.empty else None, | |
| 'min_apr': apr_only['apr'].min() if not apr_only.empty else None, | |
| 'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None, | |
| 'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None, | |
| 'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None, | |
| 'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None | |
| } | |
| stats_list.append(overall_stats) | |
| return pd.DataFrame(stats_list) | |
| # Create dummy functions for the commented out imports | |
| def create_transcation_visualizations(): | |
| """Dummy implementation that returns a placeholder graph""" | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig | |
| def create_active_agents_visualizations(): | |
| """Dummy implementation that returns a placeholder graph""" | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig | |
| # Dummy blockchain functions to replace the commented ones | |
| def get_transfers(integrator: str, wallet: str) -> str: | |
| """Dummy function that returns an empty result""" | |
| return {"transfers": []} | |
| def fetch_and_aggregate_transactions(): | |
| """Dummy function that returns empty data""" | |
| return [], {} | |
| # Function to parse the transaction data and prepare it for visualization | |
| def process_transactions_and_agents(data): | |
| """Dummy function that returns empty dataframes""" | |
| df_transactions = pd.DataFrame() | |
| df_agents = pd.DataFrame(columns=['date', 'agent_count']) | |
| df_agents_weekly = pd.DataFrame() | |
| return df_transactions, df_agents, df_agents_weekly | |
| # Function to create visualizations based on the metrics | |
| def create_visualizations(): | |
| # Placeholder figures for testing | |
| fig_swaps_chain = go.Figure() | |
| fig_swaps_chain.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| fig_bridges_chain = go.Figure() | |
| fig_bridges_chain.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| fig_agents_registered = go.Figure() | |
| fig_agents_registered.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| fig_tvl = go.Figure() | |
| fig_tvl.add_annotation( | |
| text="Blockchain data loading disabled - placeholder visualization", | |
| x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=20) | |
| ) | |
| return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl | |
| # Modify dashboard function to make the plot container responsive | |
| def dashboard(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Average Optimus Agent Performance") | |
| # Create tabs for APR and ROI metrics | |
| with gr.Tabs(): | |
| # APR Metrics tab | |
| with gr.Tab("APR Metrics"): | |
| with gr.Column(): | |
| refresh_apr_btn = gr.Button("Refresh APR Data") | |
| # Create container for plotly figure with responsive sizing | |
| with gr.Column(): | |
| combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot") | |
| # Create compact toggle controls at the bottom of the graph | |
| with gr.Row(visible=True): | |
| gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(elem_id="apr_toggle_container"): | |
| with gr.Column(scale=1, min_width=150): | |
| apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle") | |
| with gr.Column(scale=1, min_width=150): | |
| adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle") | |
| # Add a text area for status messages | |
| apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) | |
| # ROI Metrics tab | |
| with gr.Tab("ROI Metrics"): | |
| with gr.Column(): | |
| refresh_roi_btn = gr.Button("Refresh ROI Data") | |
| # Create container for plotly figure with responsive sizing | |
| with gr.Column(): | |
| combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot") | |
| # Create compact toggle controls at the bottom of the graph | |
| with gr.Row(visible=True): | |
| gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(elem_id="roi_toggle_container"): | |
| with gr.Column(scale=1, min_width=150): | |
| roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle") | |
| # Add a text area for status messages | |
| roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) | |
| # Add custom CSS for making the plots responsive | |
| gr.HTML(""" | |
| <style> | |
| /* Make plots responsive */ | |
| #responsive_apr_plot, #responsive_roi_plot { | |
| width: 100% !important; | |
| max-width: 100% !important; | |
| } | |
| #responsive_apr_plot > div, #responsive_roi_plot > div { | |
| width: 100% !important; | |
| height: auto !important; | |
| min-height: 500px !important; | |
| } | |
| /* Toggle checkbox styling */ | |
| #apr_toggle .gr-checkbox { | |
| accent-color: #e74c3c !important; | |
| } | |
| #adjusted_apr_toggle .gr-checkbox { | |
| accent-color: #2ecc71 !important; | |
| } | |
| #roi_toggle .gr-checkbox { | |
| accent-color: #3498db !important; | |
| } | |
| /* Make the toggle section more compact */ | |
| #apr_toggle_title, #roi_toggle_title { | |
| margin-bottom: 0; | |
| margin-top: 10px; | |
| } | |
| #apr_toggle_container, #roi_toggle_container { | |
| margin-top: 5px; | |
| } | |
| /* Style the checkbox labels */ | |
| .gr-form.gr-box { | |
| border: none !important; | |
| background: transparent !important; | |
| } | |
| /* Make checkboxes and labels appear on the same line */ | |
| .gr-checkbox-container { | |
| display: flex !important; | |
| align-items: center !important; | |
| } | |
| /* Colored indicators removed to fix HF Spaces rendering issues */ | |
| /* The checkbox accent colors above provide sufficient visual distinction */ | |
| </style> | |
| """) | |
| # Function to update the APR graph | |
| def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True): | |
| # Generate visualization and get figure object directly | |
| try: | |
| combined_fig, _ = generate_apr_visualizations() | |
| # Update visibility of traces based on toggle values | |
| for i, trace in enumerate(combined_fig.data): | |
| # Check if this is a moving average trace | |
| if trace.name == 'Median APR (7d window)': | |
| trace.visible = show_apr_ma | |
| elif trace.name == 'Average ETH Adjusted APR (3d window)': | |
| trace.visible = show_adjusted_apr_ma | |
| return combined_fig | |
| except Exception as e: | |
| logger.exception("Error generating APR visualization") | |
| # Create error figure | |
| error_fig = go.Figure() | |
| error_fig.add_annotation( | |
| text=f"Error: {str(e)}", | |
| x=0.5, y=0.5, | |
| showarrow=False, | |
| font=dict(size=15, color="red") | |
| ) | |
| return error_fig | |
| # Function to update the ROI graph | |
| def update_roi_graph(show_roi_ma=True): | |
| # Generate visualization and get figure object directly | |
| try: | |
| combined_fig, _ = generate_roi_visualizations() | |
| # Update visibility of traces based on toggle values | |
| for i, trace in enumerate(combined_fig.data): | |
| # Check if this is a moving average trace | |
| if trace.name == 'Median ROI (7d window)': | |
| trace.visible = show_roi_ma | |
| return combined_fig | |
| except Exception as e: | |
| logger.exception("Error generating ROI visualization") | |
| # Create error figure | |
| error_fig = go.Figure() | |
| error_fig.add_annotation( | |
| text=f"Error: {str(e)}", | |
| x=0.5, y=0.5, | |
| showarrow=False, | |
| font=dict(size=15, color="red") | |
| ) | |
| return error_fig | |
| # Initialize the APR graph on load with a placeholder | |
| apr_placeholder_fig = go.Figure() | |
| apr_placeholder_fig.add_annotation( | |
| text="Click 'Refresh APR Data' to load APR graph", | |
| x=0.5, y=0.5, | |
| showarrow=False, | |
| font=dict(size=15) | |
| ) | |
| combined_apr_graph.value = apr_placeholder_fig | |
| # Initialize the ROI graph on load with a placeholder | |
| roi_placeholder_fig = go.Figure() | |
| roi_placeholder_fig.add_annotation( | |
| text="Click 'Refresh ROI Data' to load ROI graph", | |
| x=0.5, y=0.5, | |
| showarrow=False, | |
| font=dict(size=15) | |
| ) | |
| combined_roi_graph.value = roi_placeholder_fig | |
| # Function to update the APR graph based on toggle states | |
| def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible): | |
| return update_apr_graph(apr_visible, adjusted_apr_visible) | |
| # Function to update the ROI graph based on toggle states | |
| def update_roi_graph_with_toggles(roi_visible): | |
| return update_roi_graph(roi_visible) | |
| # Function to refresh APR data | |
| def refresh_apr_data(): | |
| """Refresh APR data from the database and update the visualization""" | |
| try: | |
| # Fetch new APR data | |
| logger.info("Manually refreshing APR data...") | |
| fetch_apr_data_from_db() | |
| # Verify data was fetched successfully | |
| if global_df is None or len(global_df) == 0: | |
| logger.error("Failed to fetch APR data") | |
| return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details." | |
| # Log info about fetched data with focus on adjusted_apr | |
| may_10_2025 = datetime(2025, 5, 10) | |
| if 'timestamp' in global_df and 'adjusted_apr' in global_df: | |
| after_may_10 = global_df[global_df['timestamp'] >= may_10_2025] | |
| with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()] | |
| logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") | |
| logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") | |
| # Generate new visualization | |
| logger.info("Generating new APR visualization...") | |
| new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value) | |
| return new_graph, "APR data refreshed successfully" | |
| except Exception as e: | |
| logger.error(f"Error refreshing APR data: {e}") | |
| return combined_apr_graph.value, f"Error: {str(e)}" | |
| # Function to refresh ROI data | |
| def refresh_roi_data(): | |
| """Refresh ROI data from the database and update the visualization""" | |
| try: | |
| # Fetch new ROI data | |
| logger.info("Manually refreshing ROI data...") | |
| fetch_apr_data_from_db() # This also fetches ROI data | |
| # Verify data was fetched successfully | |
| if global_roi_df is None or len(global_roi_df) == 0: | |
| logger.error("Failed to fetch ROI data") | |
| return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details." | |
| # Generate new visualization | |
| logger.info("Generating new ROI visualization...") | |
| new_graph = update_roi_graph(roi_toggle.value) | |
| return new_graph, "ROI data refreshed successfully" | |
| except Exception as e: | |
| logger.error(f"Error refreshing ROI data: {e}") | |
| return combined_roi_graph.value, f"Error: {str(e)}" | |
| # Set up the button click event for APR refresh | |
| refresh_apr_btn.click( | |
| fn=refresh_apr_data, | |
| inputs=[], | |
| outputs=[combined_apr_graph, apr_status_text] | |
| ) | |
| # Set up the button click event for ROI refresh | |
| refresh_roi_btn.click( | |
| fn=refresh_roi_data, | |
| inputs=[], | |
| outputs=[combined_roi_graph, roi_status_text] | |
| ) | |
| # Set up the toggle switch events for APR | |
| apr_toggle.change( | |
| fn=update_apr_graph_with_toggles, | |
| inputs=[apr_toggle, adjusted_apr_toggle], | |
| outputs=[combined_apr_graph] | |
| ) | |
| adjusted_apr_toggle.change( | |
| fn=update_apr_graph_with_toggles, | |
| inputs=[apr_toggle, adjusted_apr_toggle], | |
| outputs=[combined_apr_graph] | |
| ) | |
| # Set up the toggle switch events for ROI | |
| roi_toggle.change( | |
| fn=update_roi_graph_with_toggles, | |
| inputs=[roi_toggle], | |
| outputs=[combined_roi_graph] | |
| ) | |
| return demo | |
| # Launch the dashboard | |
| if __name__ == "__main__": | |
| dashboard().launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| quiet=True # Reduce startup logging | |
| ) | |
| def generate_adjusted_apr_report(): | |
| """ | |
| Generate a detailed report about adjusted_apr data availability and save it to a file. | |
| Returns the path to the generated report file. | |
| """ | |
| global global_df | |
| if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns: | |
| logger.warning("No adjusted_apr data available for report generation") | |
| return None | |
| # Create a report file | |
| report_path = "adjusted_apr_report.txt" | |
| with open(report_path, "w") as f: | |
| f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n") | |
| # Summary statistics | |
| total_records = len(global_df) | |
| records_with_adjusted = global_df['adjusted_apr'].notna().sum() | |
| pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 | |
| f.write(f"Total APR records: {total_records}\n") | |
| f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n") | |
| # First and last data points | |
| if records_with_adjusted > 0: | |
| has_adjusted = global_df[global_df['adjusted_apr'].notna()] | |
| first_date = has_adjusted['timestamp'].min() | |
| last_date = has_adjusted['timestamp'].max() | |
| f.write(f"First adjusted_apr record: {first_date}\n") | |
| f.write(f"Last adjusted_apr record: {last_date}\n") | |
| f.write(f"Date range: {(last_date - first_date).days} days\n\n") | |
| # Agent statistics | |
| f.write("===== AGENT STATISTICS =====\n\n") | |
| # Group by agent | |
| agent_stats = [] | |
| for agent_id in global_df['agent_id'].unique(): | |
| agent_data = global_df[global_df['agent_id'] == agent_id] | |
| agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" | |
| total_agent_records = len(agent_data) | |
| agent_with_adjusted = agent_data['adjusted_apr'].notna().sum() | |
| coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 | |
| agent_stats.append({ | |
| 'agent_id': agent_id, | |
| 'agent_name': agent_name, | |
| 'total_records': total_agent_records, | |
| 'with_adjusted': agent_with_adjusted, | |
| 'coverage_pct': coverage_pct | |
| }) | |
| # Sort by coverage percentage (descending) | |
| agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True) | |
| # Write agent statistics | |
| for agent in agent_stats: | |
| f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n") | |
| f.write(f" Records: {agent['total_records']}\n") | |
| f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n") | |
| # If agent has adjusted data, show date range | |
| agent_data = global_df[global_df['agent_id'] == agent['agent_id']] | |
| agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] | |
| if not agent_adjusted.empty: | |
| first = agent_adjusted['timestamp'].min() | |
| last = agent_adjusted['timestamp'].max() | |
| f.write(f" First adjusted_apr: {first}\n") | |
| f.write(f" Last adjusted_apr: {last}\n") | |
| f.write("\n") | |
| # Check for May 10th cutoff issue | |
| f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n") | |
| may_10_2025 = datetime(2025, 5, 10) | |
| before_cutoff = global_df[global_df['timestamp'] < may_10_2025] | |
| after_cutoff = global_df[global_df['timestamp'] >= may_10_2025] | |
| # Calculate coverage before and after | |
| before_total = len(before_cutoff) | |
| before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() | |
| before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0 | |
| after_total = len(after_cutoff) | |
| after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() | |
| after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0 | |
| f.write(f"Before May 10th, 2025:\n") | |
| f.write(f" Records: {before_total}\n") | |
| f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n") | |
| f.write(f"After May 10th, 2025:\n") | |
| f.write(f" Records: {after_total}\n") | |
| f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n") | |
| # Check for agents that had data before but not after | |
| if before_total > 0 and after_total > 0: | |
| agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) | |
| agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) | |
| missing_after = agents_before - agents_after | |
| new_after = agents_after - agents_before | |
| if missing_after: | |
| f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n") | |
| # For each missing agent, show the last date with adjusted_apr | |
| for agent_id in missing_after: | |
| agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) & | |
| (before_cutoff['adjusted_apr'].notna())] | |
| if not agent_data.empty: | |
| last_date = agent_data['timestamp'].max() | |
| agent_name = agent_data['agent_name'].iloc[0] | |
| f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n") | |
| if new_after: | |
| f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n") | |
| logger.info(f"Adjusted APR report generated: {report_path}") | |
| return report_path | |