import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import io

def test_apriori_improved(csv_content, min_support=0.1, min_threshold=0.7, metric="lift", has_header=False):
    print(f"\n--- Testing: support={min_support}, threshold={min_threshold}, metric={metric}, header={has_header} ---")
    
    # Simulate the file reading logic in app.py
    df = pd.read_csv(io.StringIO(csv_content), header=0 if has_header else None)
    
    transactions = []
    values = df.values.tolist()
    for row in values:
        # Improved logic: set, sorted, stripped
        transaction = sorted(list(set([str(item).strip() for item in row if pd.notna(item) and str(item).strip() != ''])))
        if transaction:
            transactions.append(transaction)
    
    print(f"Transactions count: {len(transactions)}")
    
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    encoded_df = pd.DataFrame(te_ary, columns=te.columns_)
    
    frequent_itemsets = apriori(encoded_df, min_support=min_support, use_colnames=True)
    if frequent_itemsets.empty:
        print("No frequent itemsets found.")
        return

    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    if rules.empty:
        print("No rules found.")
        return

    print(f"Rules Found: {len(rules)}")
    print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head())

# Sample data with header
data_with_header = """Item1,Item2,Item3
Milk,Bread,Eggs
Milk,Bread
Milk,Eggs
Bread,Eggs
Milk,Bread,Eggs"""

# Test with header=True
test_apriori_improved(data_with_header, metric="confidence", has_header=True)

# Test with header=False (should see Item1, Item2, Item3 as products in transactions)
test_apriori_improved(data_with_header, metric="confidence", has_header=False)

# Test with lift
test_apriori_improved(data_with_header, metric="lift", min_threshold=1.1, has_header=True)