Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import random | |
| from datetime import datetime, timedelta | |
| import logging | |
| # Get the logger | |
| logger = logging.getLogger(__name__) | |
| def generate_continuous_random_data(existing_data, end_time=None): | |
| """ | |
| Generate authentic-looking random data that continues from existing data | |
| with adjusted APR following APR with a small offset | |
| Args: | |
| existing_data: DataFrame containing the existing data | |
| end_time: Optional end time (defaults to current time) | |
| Returns: | |
| DataFrame with dummy data points | |
| """ | |
| # Use current time if not specified | |
| if end_time is None: | |
| end_time = datetime.now() | |
| # Find the latest timestamp in the existing data | |
| if not existing_data.empty: | |
| start_time = existing_data['timestamp'].max() + timedelta(minutes=10) | |
| else: | |
| # If no existing data, start from 30 days ago | |
| start_time = end_time - timedelta(days=30) | |
| # Generate timestamps with 10-minute intervals | |
| timestamps = [] | |
| current = start_time | |
| while current <= end_time: | |
| timestamps.append(current) | |
| current += timedelta(minutes=10) | |
| if not timestamps: | |
| return pd.DataFrame() # No new data needed | |
| # Get unique agents from existing data | |
| if not existing_data.empty: | |
| unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records') | |
| else: | |
| # Create one dummy agent if no existing data | |
| unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}] | |
| dummy_data_list = [] | |
| # For each agent, create continuous dummy data | |
| for agent in unique_agents: | |
| agent_id = agent['agent_id'] | |
| # Get the last real values for this agent to ensure continuity | |
| last_apr = None | |
| last_adjusted_apr = None | |
| last_roi = None | |
| if not existing_data.empty: | |
| # Get last APR value | |
| agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & | |
| (existing_data['metric_type'] == 'APR')] | |
| if not agent_apr_data.empty: | |
| last_apr = agent_apr_data['apr'].iloc[-1] | |
| last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1] | |
| # Get last ROI value | |
| agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & | |
| (existing_data['metric_type'] == 'ROI')] | |
| if not agent_roi_data.empty: | |
| last_roi = agent_roi_data['roi'].iloc[-1] | |
| # If no last values, start with reasonable values in our range | |
| if last_apr is None or pd.isna(last_apr): | |
| last_apr = random.uniform(-0.1, 0.1) # Start close to zero | |
| if last_adjusted_apr is None or pd.isna(last_adjusted_apr): | |
| # If we have APR but no adjusted APR, make it slightly different than APR | |
| # Sometimes higher, sometimes lower to look more natural | |
| if random.random() > 0.5: | |
| last_adjusted_apr = last_apr + random.uniform(0.05, 0.15) | |
| else: | |
| last_adjusted_apr = last_apr - random.uniform(0.05, 0.15) | |
| last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr)) | |
| if last_roi is None or pd.isna(last_roi): | |
| last_roi = random.uniform(-0.1, 0.1) # Start close to zero | |
| # Generate APR values using smoother random walk | |
| apr_values = [last_apr] | |
| # Create a more natural pattern with some trends | |
| # Define a few trend periods to make it look more authentic | |
| num_points = len(timestamps) | |
| trend_periods = [] | |
| # Create 3-5 trend periods | |
| num_trends = random.randint(3, 5) | |
| period_length = num_points // num_trends | |
| for i in range(num_trends): | |
| # Each trend has a direction (up, down, or sideways) | |
| # and a strength (how strong the trend is) | |
| direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up | |
| strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look | |
| start_idx = i * period_length | |
| end_idx = min((i + 1) * period_length, num_points) | |
| trend_periods.append({ | |
| 'start': start_idx, | |
| 'end': end_idx, | |
| 'direction': direction, | |
| 'strength': strength | |
| }) | |
| # Generate values following the trends | |
| for i in range(1, num_points): | |
| # Find which trend period we're in | |
| current_trend = None | |
| for trend in trend_periods: | |
| if trend['start'] <= i < trend['end']: | |
| current_trend = trend | |
| break | |
| # If we couldn't find a trend (shouldn't happen), use a neutral trend | |
| if current_trend is None: | |
| current_trend = {'direction': 0, 'strength': 0.01} | |
| # Base change is influenced by the trend | |
| base_change = current_trend['direction'] * current_trend['strength'] | |
| # Add some randomness | |
| random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness | |
| # Previous momentum (30% influence to make it smoother) | |
| prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2] | |
| momentum = 0.3 * prev_change | |
| # Combine all factors | |
| total_change = base_change + random_change + momentum | |
| # Apply the change | |
| new_value = apr_values[i-1] + total_change | |
| # Keep within reasonable bounds (-0.5 to 1.0) | |
| new_value = max(-0.5, min(1.0, new_value)) | |
| apr_values.append(new_value) | |
| # Generate adjusted APR values that follow APR with a small, varying offset | |
| adjusted_apr_values = [] | |
| for i, apr_value in enumerate(apr_values): | |
| # Make adjusted APR follow APR but with a small, varying offset | |
| # Sometimes higher, sometimes lower to look more natural | |
| if i % 5 == 0: # Periodically recalculate the offset direction | |
| offset_direction = 1 if random.random() > 0.5 else -1 | |
| offset = offset_direction * random.uniform(0.05, 0.15) | |
| adjusted_value = apr_value + offset | |
| # Keep within reasonable bounds (-0.5 to 1.0) | |
| adjusted_value = max(-0.5, min(1.0, adjusted_value)) | |
| adjusted_apr_values.append(adjusted_value) | |
| # Generate ROI values with a completely different approach to ensure better distribution | |
| # Note: ROI values will be multiplied by 100 in app.py, so we need to generate values | |
| # between -0.01 and 0 to get final values between -1 and 0 | |
| # Instead of building on the last_roi value, we'll generate a completely new sequence | |
| # that's well-distributed between -0.01 and 0 | |
| # First, create a sequence of target values that we want to hit | |
| # This ensures we get good coverage of the entire range | |
| target_points = [] | |
| for i in range(5): # Create 5 target points | |
| # Distribute targets across the range, but avoid exactly 0 | |
| target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025 | |
| target_points.append(target) | |
| # Shuffle the targets to make the pattern less predictable | |
| random.shuffle(target_points) | |
| # Divide the total points into segments, one for each target | |
| segment_length = num_points // len(target_points) | |
| # Generate the ROI values | |
| roi_values = [] | |
| # Start with the last real value, or a random value in our range if none exists | |
| if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0: | |
| # If no valid last value, start in the middle of our range | |
| current_value = -0.005 | |
| else: | |
| current_value = last_roi | |
| roi_values.append(current_value) | |
| # For each segment, gradually move toward the target value | |
| for segment_idx, target in enumerate(target_points): | |
| start_idx = segment_idx * segment_length | |
| end_idx = min((segment_idx + 1) * segment_length, num_points) | |
| # How many steps we have to reach the target | |
| steps = end_idx - start_idx | |
| if steps <= 0: | |
| continue # Skip if this segment has no points | |
| # Current value is the last value in roi_values | |
| current_value = roi_values[-1] | |
| # Calculate how much to change per step to reach the target | |
| step_change = (target - current_value) / steps | |
| # Generate values for this segment | |
| for step in range(steps): | |
| # Base change to move toward target | |
| base_change = step_change | |
| # Add some randomness, but make sure we're still generally moving toward the target | |
| random_factor = random.uniform(-0.0005, 0.0005) | |
| # Calculate new value | |
| new_value = current_value + base_change + random_factor | |
| # Ensure we stay within range | |
| new_value = max(-0.01, min(0, new_value)) | |
| roi_values.append(new_value) | |
| current_value = new_value | |
| # If we didn't generate enough points, add more | |
| while len(roi_values) < num_points + 1: | |
| # Add a point with small random variation from the last point | |
| last_value = roi_values[-1] | |
| new_value = last_value + random.uniform(-0.001, 0.001) | |
| new_value = max(-0.01, min(0, new_value)) | |
| roi_values.append(new_value) | |
| # If we generated too many points, trim the list | |
| roi_values = roi_values[:num_points + 1] | |
| # Create dummy data points | |
| for i, timestamp in enumerate(timestamps): | |
| # APR data | |
| dummy_apr = { | |
| 'timestamp': timestamp, | |
| 'apr': apr_values[i], | |
| 'adjusted_apr': adjusted_apr_values[i], | |
| 'roi': None, | |
| 'agent_id': agent_id, | |
| 'agent_name': agent['agent_name'], | |
| 'is_dummy': True, | |
| 'metric_type': 'APR' | |
| } | |
| dummy_data_list.append(dummy_apr) | |
| # ROI data | |
| dummy_roi = { | |
| 'timestamp': timestamp, | |
| 'apr': None, | |
| 'adjusted_apr': None, | |
| 'roi': roi_values[i], | |
| 'agent_id': agent_id, | |
| 'agent_name': agent['agent_name'], | |
| 'is_dummy': True, | |
| 'metric_type': 'ROI' | |
| } | |
| dummy_data_list.append(dummy_roi) | |
| return pd.DataFrame(dummy_data_list) | |