Spaces:

mic3333
/

dash-mcp

Sleeping

File size: 36,942 Bytes

20706fe

import os
import base64
import io
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, html, dcc, Input, Output, State, callback_context
import dash_bootstrap_components as dbc
from typing import Optional
from dotenv import load_dotenv
from pydantic import Field, SecretStr
import numpy as np

# Langchain imports - simplified without embeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_core.prompts import PromptTemplate

# Load environment variables
load_dotenv()

# Simplified - no OpenRouter for now
AI_AVAILABLE = False
openrouter_model = None

# Initialize Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server

# Global variables
vector_store = None

# Built-in datasets
def create_builtin_datasets():
    """Create built-in sample datasets"""
    datasets = {}
    
    # Gapminder dataset
    np.random.seed(42)
    countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia']
    years = list(range(2000, 2021))
    gapminder_data = []
    for country in countries:
        base_gdp = np.random.uniform(20000, 80000)
        base_life_exp = np.random.uniform(70, 85)
        base_pop = np.random.uniform(10000000, 100000000)
        for year in years:
            gapminder_data.append({
                'country': country,
                'year': year,
                'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1),
                'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1,
                'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000),
                'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania'
            })
    datasets['Gapminder'] = pd.DataFrame(gapminder_data)
    
    # Iris dataset
    from sklearn.datasets import load_iris
    try:
        iris = load_iris()
        datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names)
        datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target]
    except ImportError:
        # Fallback if sklearn not available
        iris_data = {
            'sepal_length': np.random.normal(5.8, 0.8, 150),
            'sepal_width': np.random.normal(3.0, 0.4, 150),
            'petal_length': np.random.normal(3.8, 1.8, 150),
            'petal_width': np.random.normal(1.2, 0.8, 150),
            'species': ['setosa']*50 + ['versicolor']*50 + ['virginica']*50
        }
        datasets['Iris'] = pd.DataFrame(iris_data)
    
    # Tips dataset
    tips_data = {
        'total_bill': np.random.uniform(10, 50, 200),
        'tip': np.random.uniform(1, 10, 200),
        'sex': np.random.choice(['Male', 'Female'], 200),
        'smoker': np.random.choice(['Yes', 'No'], 200),
        'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200),
        'time': np.random.choice(['Lunch', 'Dinner'], 200),
        'size': np.random.choice([1, 2, 3, 4, 5, 6], 200)
    }
    datasets['Tips'] = pd.DataFrame(tips_data)
    
    # Stock Data
    dates = pd.date_range('2020-01-01', '2023-12-31', freq='D')
    stock_price = 100
    stock_data = []
    for date in dates:
        daily_return = np.random.normal(0.001, 0.02)
        stock_price *= (1 + daily_return)
        stock_data.append({
            'date': date,
            'price': stock_price,
            'volume': np.random.randint(1000000, 5000000),
            'high': stock_price * (1 + abs(np.random.normal(0, 0.01))),
            'low': stock_price * (1 - abs(np.random.normal(0, 0.01))),
            'open': stock_price * (1 + np.random.normal(0, 0.005))
        })
    datasets['Stock Data'] = pd.DataFrame(stock_data)
    
    # Wind Data
    hours = list(range(24))
    wind_data = []
    for month in range(1, 13):
        for day in range(1, 29):
            for hour in hours:
                wind_data.append({
                    'month': month,
                    'day': day,
                    'hour': hour,
                    'wind_speed': abs(np.random.normal(15, 8)) + 5*np.sin(hour/24*2*np.pi),
                    'temperature': np.random.normal(20, 15) + 10*np.cos(month/12*2*np.pi),
                    'humidity': np.random.uniform(30, 90),
                    'pressure': np.random.normal(1013, 20)
                })
    datasets['Wind Data'] = pd.DataFrame(wind_data)
    
    return datasets

# Initialize built-in datasets
builtin_datasets = create_builtin_datasets()

# App layout
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"),
            html.P("Upload data, ask questions, and get AI-powered insights!", 
                   className="text-center text-muted"),
            html.Hr(),
        ], width=12)
    ]),
    
    # Tabbed interface
    dbc.Tabs([
        # Tab 1: Dataset Management
        dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Load Built-in Dataset", className="card-title"),
                            dcc.Dropdown(
                                id="builtin-choice",
                                options=[
                                    {"label": "Gapminder", "value": "Gapminder"},
                                    {"label": "Iris", "value": "Iris"},
                                    {"label": "Tips", "value": "Tips"},
                                    {"label": "Stock Data", "value": "Stock Data"},
                                    {"label": "Wind Data", "value": "Wind Data"}
                                ],
                                value="Gapminder",
                                className="mb-2"
                            ),
                            dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"),
                            
                            html.Hr(),
                            html.H4("Upload Custom Dataset", className="card-title"),
                            dcc.Upload(
                                id='file-upload',
                                children=html.Div([
                                    'Drag and Drop or ',
                                    html.A('Select CSV/Excel Files')
                                ]),
                                style={
                                    'width': '100%',
                                    'height': '60px',
                                    'lineHeight': '60px',
                                    'borderWidth': '1px',
                                    'borderStyle': 'dashed',
                                    'borderRadius': '5px',
                                    'textAlign': 'center',
                                    'margin': '10px'
                                },
                                multiple=False,
                                accept='.csv,.xlsx,.xls'
                            ),
                            
                            dbc.Input(
                                id="custom-name",
                                placeholder="Dataset Name (optional)",
                                type="text",
                                className="mb-2"
                            ),
                            dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"),
                            
                            html.Hr(),
                            html.H4("Active Datasets", className="card-title"),
                            dcc.Dropdown(
                                id="dataset-selector",
                                options=[{"label": "Gapminder", "value": "Gapminder"}],
                                value="Gapminder",
                                className="mb-2"
                            ),
                            
                            html.Hr(),
                            html.Div(id="status-msg", children=[
                                dbc.Alert("Ready to load data", color="info")
                            ]),
                            html.Div(id="data-info")
                        ])
                    ])
                ], width=4),
                
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Data Preview (First 10 rows)", className="card-title"),
                            html.Div(id="data-preview", className="mb-4"),
                            html.H4("Quick Analytics", className="card-title"),
                            html.Div(id="auto-analytics")
                        ])
                    ])
                ], width=8)
            ], className="mt-4")
        ]),
        
        # Tab 2: AI Assistant
        dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("🤖 AI Assistant", className="card-title"),
                            html.Div(id="ai-dataset-info", className="mb-3", children=[
                                dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", 
                                         color="warning", className="mb-3")
                            ]),
                            dbc.InputGroup([
                                dbc.Input(
                                    id="ai-question",
                                    placeholder="Ask questions about your data...",
                                    type="text",
                                    style={"fontSize": "14px"}
                                ),
                                dbc.Button(
                                    "Ask AI", 
                                    id="ask-button", 
                                    color="primary",
                                    n_clicks=0
                                )
                            ]),
                            
                            html.Div(id="ai-response", className="mt-3")
                        ])
                    ])
                ], width=12)
            ], className="mt-4")
        ]),
        
        # Tab 3: Visualizations
        dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("📈 Visualizations", className="card-title"),
                            
                            # Chart controls
                            dbc.Row([
                                dbc.Col([
                                    html.Label("Chart Type:", className="form-label"),
                                    dcc.Dropdown(
                                        id='chart-type',
                                        options=[
                                            {'label': 'Scatter Plot', 'value': 'scatter'},
                                            {'label': 'Line Chart', 'value': 'line'},
                                            {'label': 'Bar Chart', 'value': 'bar'},
                                            {'label': 'Histogram', 'value': 'histogram'},
                                            {'label': 'Box Plot', 'value': 'box'},
                                            {'label': 'Heatmap', 'value': 'heatmap'},
                                            {'label': 'Pie Chart', 'value': 'pie'}
                                        ],
                                        value='scatter',
                                        className="mb-2"
                                    )
                                ], width=6),
                                dbc.Col([
                                    html.Label("Color By:", className="form-label"),
                                    dcc.Dropdown(
                                        id='color-column',
                                        placeholder="Select column (optional)",
                                        className="mb-2"
                                    )
                                ], width=6)
                            ]),
                            
                            dbc.Row([
                                dbc.Col([
                                    html.Label("X-Axis:", className="form-label"),
                                    dcc.Dropdown(
                                        id='x-column',
                                        placeholder="Select X column"
                                    )
                                ], width=6),
                                dbc.Col([
                                    html.Label("Y-Axis:", className="form-label"),
                                    dcc.Dropdown(
                                        id='y-column',
                                        placeholder="Select Y column"
                                    )
                                ], width=6)
                            ], className="mb-3"),
                            
                            dcc.Graph(id='main-graph', style={'height': '500px'}),
                        ])
                    ])
                ], width=12)
            ], className="mt-4")
        ]),
        
        # Tab 4: Data Explorer
        dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("🔍 Data Explorer", className="card-title"),
                            html.Div(id='data-table')
                        ])
                    ])
                ], width=12)
            ], className="mt-4")
        ])
    ], id="main-tabs", active_tab="dataset-management"),
    
    # Store components
    dcc.Store(id='stored-data'),
    dcc.Store(id='data-context'),
    dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}),
    dcc.Store(id='current-dataset-name', data="Gapminder")
], fluid=True)

def create_vector_store(df):
    """Simplified - just return True for now"""
    return True

# Import AI assistant module
from ai_assistant import get_ai_response

def create_auto_analytics(df):
    """Create automatic analytics display"""
    analytics_components = []
    
    # Summary Statistics
    numeric_cols = df.select_dtypes(include=['number']).columns
    if len(numeric_cols) > 0:
        stats = df[numeric_cols].describe()
        analytics_components.extend([
            html.H6("📊 Summary Statistics", className="mt-2"),
            dbc.Table.from_dataframe(
                stats.reset_index().round(2), 
                size='sm', 
                striped=True, 
                hover=True
            )
        ])
    
    # Missing Data Analysis
    missing_data = df.isnull().sum()
    missing_data = missing_data[missing_data > 0]
    if not missing_data.empty:
        analytics_components.extend([
            html.H6("⚠️ Missing Data", className="mt-3"),
            dbc.Alert([
                html.Pre(missing_data.to_string())
            ], color="warning")
        ])
    else:
        analytics_components.extend([
            html.H6("✅ Data Quality", className="mt-3"),
            dbc.Alert("No missing values found!", color="success")
        ])
    
    # Data Types Analysis
    dtype_info = df.dtypes.value_counts()
    analytics_components.extend([
        html.H6("🔍 Data Types", className="mt-3"),
        dbc.Alert([
            html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"),
            html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"),
            html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"),
            html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}")
        ], color="light")
    ])
    
    # Correlation Analysis for numeric columns
    if len(numeric_cols) > 1:
        corr_matrix = df[numeric_cols].corr()
        # Find highest correlations
        corr_pairs = []
        for i in range(len(corr_matrix.columns)):
            for j in range(i+1, len(corr_matrix.columns)):
                corr_val = corr_matrix.iloc[i, j]
                if abs(corr_val) > 0.5:  # Only show strong correlations
                    corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))
        
        if corr_pairs:
            analytics_components.extend([
                html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"),
                dbc.Alert([
                    html.P(f"{pair[0]} ↔ {pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5]  # Show top 5
                ], color="info")
            ])
    
    return analytics_components

def parse_contents(contents, filename):
    """Parse uploaded file contents"""
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    
    try:
        if 'csv' in filename:
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            df = pd.read_excel(io.BytesIO(decoded))
        else:
            return None, "Unsupported file type"
        
        return df, None
    except Exception as e:
        return None, f"Error processing file: {str(e)}"

# Dataset management callbacks
@app.callback(
    [Output('stored-data', 'data'),
     Output('status-msg', 'children'),
     Output('data-preview', 'children'),
     Output('data-info', 'children'),
     Output('auto-analytics', 'children'),
     Output('x-column', 'options'),
     Output('y-column', 'options'),
     Output('color-column', 'options'),
     Output('x-column', 'value'),
     Output('y-column', 'value'),
     Output('dataset-registry', 'data'),
     Output('dataset-selector', 'options'),
     Output('current-dataset-name', 'data')],
    [Input('load-builtin-btn', 'n_clicks'),
     Input('file-upload', 'contents'),
     Input('dataset-selector', 'value')],
    [State('builtin-choice', 'value'),
     State('file-upload', 'filename'),
     State('custom-name', 'value'),
     State('dataset-registry', 'data')]
)
def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry):
    """Handle dataset loading and switching"""
    ctx = callback_context
    
    # Initialize defaults
    registry = registry or {"Gapminder": "builtin"}
    
    if not ctx.triggered:
        # Initial load - load Gapminder dataset
        df = builtin_datasets["Gapminder"]
        dataset_name = "Gapminder"
        
        # Create vector store for AI
        vector_success = create_vector_store(df)
        
        # Create data table preview
        table = dbc.Table.from_dataframe(
            df.head(10), 
            striped=True, 
            bordered=True, 
            hover=True, 
            size='sm'
        )
        
        ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
        status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success")
        
        data_info = dbc.Alert([
            html.H6("Dataset Information:"),
            html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
            html.P(f"Columns: {', '.join(df.columns.tolist())}"),
            html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
        ], color="light")
        
        # Create automatic analytics
        auto_analytics = create_auto_analytics(df)
        
        # Create column options for dropdowns
        all_columns = [{'label': col, 'value': col} for col in df.columns]
        numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
        
        # Set default values - prefer numeric columns for x and y
        default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
        default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
        
        selector_options = [{"label": name, "value": name} for name in registry.keys()]
        
        return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
    
    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
    
    if trigger_id == 'load-builtin-btn' and builtin_clicks:
        # Load built-in dataset
        if builtin_choice in builtin_datasets:
            df = builtin_datasets[builtin_choice]
            registry[builtin_choice] = "builtin"
            
            # Create vector store for AI
            vector_success = create_vector_store(df)
            
            # Create data table preview
            table = dbc.Table.from_dataframe(
                df.head(10), 
                striped=True, 
                bordered=True, 
                hover=True, 
                size='sm'
            )
            
            ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
            status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success")
            
            data_info = dbc.Alert([
                html.H6(f"{builtin_choice} Dataset Information:"),
                html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
                html.P(f"Columns: {', '.join(df.columns.tolist())}"),
                html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
            ], color="light")
            
            # Create automatic analytics
            auto_analytics = create_auto_analytics(df)
            
            # Create column options for dropdowns
            all_columns = [{'label': col, 'value': col} for col in df.columns]
            numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
            
            # Set default values - prefer numeric columns for x and y
            default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
            default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
            
            selector_options = [{"label": name, "value": name} for name in registry.keys()]
            
            return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice
    
    elif trigger_id == 'file-upload' and file_contents:
        # Upload custom dataset
        df, error = parse_contents(file_contents, filename)
        
        if error:
            status_msg = dbc.Alert(error, color="danger")
            selector_options = [{"label": name, "value": name} for name in registry.keys()]
            return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None
        
        # Determine dataset name
        dataset_name = custom_name if custom_name else filename.split('.')[0]
        registry[dataset_name] = "custom"
        
        # Create vector store for AI
        vector_success = create_vector_store(df)
        
        # Create data table preview
        table = dbc.Table.from_dataframe(
            df.head(10), 
            striped=True, 
            bordered=True, 
            hover=True, 
            size='sm'
        )
        
        ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
        status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success")
        
        data_info = dbc.Alert([
            html.H6(f"{dataset_name} Dataset Information:"),
            html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
            html.P(f"Columns: {', '.join(df.columns.tolist())}"),
            html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
        ], color="light")
        
        # Create automatic analytics
        auto_analytics = create_auto_analytics(df)
        
        # Create column options for dropdowns
        all_columns = [{'label': col, 'value': col} for col in df.columns]
        numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
        
        # Set default values - prefer numeric columns for x and y
        default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
        default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
        
        selector_options = [{"label": name, "value": name} for name in registry.keys()]
        
        return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
    
    elif trigger_id == 'dataset-selector' and selected_dataset:
        # Switch between datasets
        if selected_dataset in registry:
            if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets:
                df = builtin_datasets[selected_dataset]
            else:
                # For custom datasets, we would need to store them persistently
                # For now, just reload builtin if available
                if selected_dataset in builtin_datasets:
                    df = builtin_datasets[selected_dataset]
                else:
                    # Fallback to Gapminder if dataset not found
                    df = builtin_datasets["Gapminder"]
                    selected_dataset = "Gapminder"
            
            # Create vector store for AI
            vector_success = create_vector_store(df)
            
            # Create data table preview
            table = dbc.Table.from_dataframe(
                df.head(10), 
                striped=True, 
                bordered=True, 
                hover=True, 
                size='sm'
            )
            
            ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
            status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success")
            
            data_info = dbc.Alert([
                html.H6(f"{selected_dataset} Dataset Information:"),
                html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
                html.P(f"Columns: {', '.join(df.columns.tolist())}"),
                html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
            ], color="light")
            
            # Create automatic analytics
            auto_analytics = create_auto_analytics(df)
            
            # Create column options for dropdowns
            all_columns = [{'label': col, 'value': col} for col in df.columns]
            numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
            
            # Set default values - prefer numeric columns for x and y
            default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
            default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
            
            selector_options = [{"label": name, "value": name} for name in registry.keys()]
            
            return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset
    
    # Default fallback
    selector_options = [{"label": name, "value": name} for name in registry.keys()]
    return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None

# Updated callback for data table (now shared across tabs)
@app.callback(
    Output('data-table', 'children'),
    [Input('stored-data', 'data')]
)
def update_data_table(data):
    """Update data table for data explorer tab"""
    if not data:
        return html.P("No data loaded", className="text-muted")
    
    df = pd.DataFrame(data)
    return dbc.Table.from_dataframe(
        df.head(20), 
        striped=True, 
        bordered=True, 
        hover=True, 
        size='sm',
        responsive=True
    )

# Callback to update AI assistant tab with current dataset info
@app.callback(
    Output('ai-dataset-info', 'children'),
    [Input('stored-data', 'data'),
     Input('current-dataset-name', 'data')]
)
def update_ai_dataset_info(data, dataset_name):
    """Update AI assistant tab with current dataset information"""
    if not data or not dataset_name:
        return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", 
                        color="warning", className="mb-3")
    
    df = pd.DataFrame(data)
    return dbc.Alert([
        html.H6(f"📊 Current Dataset: {dataset_name}"),
        html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"),
        html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"),
        html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"),
        html.Small("✨ AI is ready to answer questions about this data!", className="text-muted")
    ], color="success", className="mb-3")

@app.callback(
    Output('ai-response', 'children'),
    [Input('ask-button', 'n_clicks')],
    [State('ai-question', 'value'),
     State('stored-data', 'data'),
     State('current-dataset-name', 'data')]
)
def handle_ai_question(n_clicks, question, data, dataset_name):
    """Handle AI question"""
    if not n_clicks or not question or not data:
        return ""
    
    if not dataset_name:
        return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning")
    
    df = pd.DataFrame(data)
    response = get_ai_response(question, df)
    
    return dbc.Alert(
        dcc.Markdown(response),
        color="info"
    )


@app.callback(
    Output('main-graph', 'figure'),
    [Input('stored-data', 'data'),
     Input('chart-type', 'value'),
     Input('x-column', 'value'),
     Input('y-column', 'value'),
     Input('color-column', 'value')]
)
def update_main_graph(data, chart_type, x_col, y_col, color_col):
    """Update main visualization based on user selections"""
    if not data:
        fig = go.Figure()
        fig.add_annotation(text="Upload data to see visualizations", 
                         x=0.5, y=0.5, showarrow=False, 
                         font=dict(size=16, color="gray"))
        fig.update_layout(template="plotly_white")
        return fig
    
    df = pd.DataFrame(data)
    
    # Handle cases where columns aren't selected yet
    if not x_col and not y_col:
        fig = go.Figure()
        fig.add_annotation(text="Select columns to create visualization", 
                         x=0.5, y=0.5, showarrow=False,
                         font=dict(size=16, color="gray"))
        fig.update_layout(template="plotly_white")
        return fig
    
    try:
        # Create visualization based on chart type
        if chart_type == 'scatter':
            if x_col and y_col:
                fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
                               title=f"Scatter Plot: {y_col} vs {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select both X and Y columns for scatter plot", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'line':
            if x_col and y_col:
                fig = px.line(df, x=x_col, y=y_col, color=color_col,
                             title=f"Line Chart: {y_col} vs {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select both X and Y columns for line chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'bar':
            if x_col and y_col:
                fig = px.bar(df, x=x_col, y=y_col, color=color_col,
                           title=f"Bar Chart: {y_col} by {x_col}")
            elif x_col:
                fig = px.bar(df[x_col].value_counts().reset_index(), 
                           x='index', y=x_col,
                           title=f"Value Counts: {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select at least X column for bar chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'histogram':
            if x_col:
                fig = px.histogram(df, x=x_col, color=color_col,
                                 title=f"Histogram: {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select X column for histogram", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'box':
            if y_col:
                fig = px.box(df, x=color_col, y=y_col,
                           title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""))
            elif x_col:
                fig = px.box(df, y=x_col,
                           title=f"Box Plot: {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select a column for box plot", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'heatmap':
            numeric_cols = df.select_dtypes(include=['number']).columns
            if len(numeric_cols) > 1:
                corr_matrix = df[numeric_cols].corr()
                fig = px.imshow(corr_matrix, 
                              text_auto=True, 
                              aspect="auto",
                              title="Correlation Heatmap",
                              color_continuous_scale='RdBu_r')
            else:
                fig = go.Figure()
                fig.add_annotation(text="Need at least 2 numeric columns for heatmap", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'pie':
            if x_col:
                value_counts = df[x_col].value_counts()
                fig = px.pie(values=value_counts.values, 
                           names=value_counts.index,
                           title=f"Pie Chart: {x_col}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select X column for pie chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        else:
            fig = go.Figure()
            fig.add_annotation(text="Select a chart type", 
                             x=0.5, y=0.5, showarrow=False)
        
        fig.update_layout(template="plotly_white", height=500)
        return fig
        
    except Exception as e:
        fig = go.Figure()
        fig.add_annotation(text=f"Error creating chart: {str(e)}", 
                         x=0.5, y=0.5, showarrow=False,
                         font=dict(color="red"))
        fig.update_layout(template="plotly_white")
        return fig

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860, debug=False)