Optimus-Agent-Performance

Sleeping

Optimus-Agent-Performance / fetch_and_preprocess_data.py

gauravlochab

feat: implement data fetching for APR and ROI metrics

2425de8 7 months ago

11.3 kB

	import pandas as pd
	import numpy as np
	import random
	from datetime import datetime, timedelta
	import logging

	# Get the logger
	logger = logging.getLogger(__name__)

	def generate_continuous_random_data(existing_data, end_time=None):
	"""
	Generate authentic-looking random data that continues from existing data
	with adjusted APR following APR with a small offset

	Args:
	existing_data: DataFrame containing the existing data
	end_time: Optional end time (defaults to current time)

	Returns:
	DataFrame with dummy data points
	"""
	# Use current time if not specified
	if end_time is None:
	end_time = datetime.now()

	# Find the latest timestamp in the existing data
	if not existing_data.empty:
	start_time = existing_data['timestamp'].max() + timedelta(minutes=10)
	else:
	# If no existing data, start from 30 days ago
	start_time = end_time - timedelta(days=30)

	# Generate timestamps with 10-minute intervals
	timestamps = []
	current = start_time
	while current <= end_time:
	timestamps.append(current)
	current += timedelta(minutes=10)

	if not timestamps:
	return pd.DataFrame() # No new data needed

	# Get unique agents from existing data
	if not existing_data.empty:
	unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records')
	else:
	# Create one dummy agent if no existing data
	unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}]

	dummy_data_list = []

	# For each agent, create continuous dummy data
	for agent in unique_agents:
	agent_id = agent['agent_id']

	# Get the last real values for this agent to ensure continuity
	last_apr = None
	last_adjusted_apr = None
	last_roi = None

	if not existing_data.empty:
	# Get last APR value
	agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) &
	(existing_data['metric_type'] == 'APR')]
	if not agent_apr_data.empty:
	last_apr = agent_apr_data['apr'].iloc[-1]
	last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1]

	# Get last ROI value
	agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) &
	(existing_data['metric_type'] == 'ROI')]
	if not agent_roi_data.empty:
	last_roi = agent_roi_data['roi'].iloc[-1]

	# If no last values, start with reasonable values in our range
	if last_apr is None or pd.isna(last_apr):
	last_apr = random.uniform(-0.1, 0.1) # Start close to zero

	if last_adjusted_apr is None or pd.isna(last_adjusted_apr):
	# If we have APR but no adjusted APR, make it slightly different than APR
	# Sometimes higher, sometimes lower to look more natural
	if random.random() > 0.5:
	last_adjusted_apr = last_apr + random.uniform(0.05, 0.15)
	else:
	last_adjusted_apr = last_apr - random.uniform(0.05, 0.15)
	last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr))

	if last_roi is None or pd.isna(last_roi):
	last_roi = random.uniform(-0.1, 0.1) # Start close to zero

	# Generate APR values using smoother random walk
	apr_values = [last_apr]

	# Create a more natural pattern with some trends
	# Define a few trend periods to make it look more authentic
	num_points = len(timestamps)
	trend_periods = []

	# Create 3-5 trend periods
	num_trends = random.randint(3, 5)
	period_length = num_points // num_trends

	for i in range(num_trends):
	# Each trend has a direction (up, down, or sideways)
	# and a strength (how strong the trend is)
	direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up
	strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look

	start_idx = i * period_length
	end_idx = min((i + 1) * period_length, num_points)

	trend_periods.append({
	'start': start_idx,
	'end': end_idx,
	'direction': direction,
	'strength': strength
	})

	# Generate values following the trends
	for i in range(1, num_points):
	# Find which trend period we're in
	current_trend = None
	for trend in trend_periods:
	if trend['start'] <= i < trend['end']:
	current_trend = trend
	break

	# If we couldn't find a trend (shouldn't happen), use a neutral trend
	if current_trend is None:
	current_trend = {'direction': 0, 'strength': 0.01}

	# Base change is influenced by the trend
	base_change = current_trend['direction'] * current_trend['strength']

	# Add some randomness
	random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness

	# Previous momentum (30% influence to make it smoother)
	prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2]
	momentum = 0.3 * prev_change

	# Combine all factors
	total_change = base_change + random_change + momentum

	# Apply the change
	new_value = apr_values[i-1] + total_change

	# Keep within reasonable bounds (-0.5 to 1.0)
	new_value = max(-0.5, min(1.0, new_value))

	apr_values.append(new_value)

	# Generate adjusted APR values that follow APR with a small, varying offset
	adjusted_apr_values = []
	for i, apr_value in enumerate(apr_values):
	# Make adjusted APR follow APR but with a small, varying offset
	# Sometimes higher, sometimes lower to look more natural
	if i % 5 == 0: # Periodically recalculate the offset direction
	offset_direction = 1 if random.random() > 0.5 else -1

	offset = offset_direction * random.uniform(0.05, 0.15)
	adjusted_value = apr_value + offset

	# Keep within reasonable bounds (-0.5 to 1.0)
	adjusted_value = max(-0.5, min(1.0, adjusted_value))
	adjusted_apr_values.append(adjusted_value)

	# Generate ROI values with a completely different approach to ensure better distribution
	# Note: ROI values will be multiplied by 100 in app.py, so we need to generate values
	# between -0.01 and 0 to get final values between -1 and 0

	# Instead of building on the last_roi value, we'll generate a completely new sequence
	# that's well-distributed between -0.01 and 0

	# First, create a sequence of target values that we want to hit
	# This ensures we get good coverage of the entire range
	target_points = []
	for i in range(5): # Create 5 target points
	# Distribute targets across the range, but avoid exactly 0
	target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025
	target_points.append(target)

	# Shuffle the targets to make the pattern less predictable
	random.shuffle(target_points)

	# Divide the total points into segments, one for each target
	segment_length = num_points // len(target_points)

	# Generate the ROI values
	roi_values = []

	# Start with the last real value, or a random value in our range if none exists
	if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0:
	# If no valid last value, start in the middle of our range
	current_value = -0.005
	else:
	current_value = last_roi

	roi_values.append(current_value)

	# For each segment, gradually move toward the target value
	for segment_idx, target in enumerate(target_points):
	start_idx = segment_idx * segment_length
	end_idx = min((segment_idx + 1) * segment_length, num_points)

	# How many steps we have to reach the target
	steps = end_idx - start_idx

	if steps <= 0:
	continue # Skip if this segment has no points

	# Current value is the last value in roi_values
	current_value = roi_values[-1]

	# Calculate how much to change per step to reach the target
	step_change = (target - current_value) / steps

	# Generate values for this segment
	for step in range(steps):
	# Base change to move toward target
	base_change = step_change

	# Add some randomness, but make sure we're still generally moving toward the target
	random_factor = random.uniform(-0.0005, 0.0005)

	# Calculate new value
	new_value = current_value + base_change + random_factor

	# Ensure we stay within range
	new_value = max(-0.01, min(0, new_value))

	roi_values.append(new_value)
	current_value = new_value

	# If we didn't generate enough points, add more
	while len(roi_values) < num_points + 1:
	# Add a point with small random variation from the last point
	last_value = roi_values[-1]
	new_value = last_value + random.uniform(-0.001, 0.001)
	new_value = max(-0.01, min(0, new_value))
	roi_values.append(new_value)

	# If we generated too many points, trim the list
	roi_values = roi_values[:num_points + 1]

	# Create dummy data points
	for i, timestamp in enumerate(timestamps):
	# APR data
	dummy_apr = {
	'timestamp': timestamp,
	'apr': apr_values[i],
	'adjusted_apr': adjusted_apr_values[i],
	'roi': None,
	'agent_id': agent_id,
	'agent_name': agent['agent_name'],
	'is_dummy': True,
	'metric_type': 'APR'
	}
	dummy_data_list.append(dummy_apr)

	# ROI data
	dummy_roi = {
	'timestamp': timestamp,
	'apr': None,
	'adjusted_apr': None,
	'roi': roi_values[i],
	'agent_id': agent_id,
	'agent_name': agent['agent_name'],
	'is_dummy': True,
	'metric_type': 'ROI'
	}
	dummy_data_list.append(dummy_roi)

	return pd.DataFrame(dummy_data_list)