Unverified Commit 3cb575fc authored by Claude's avatar Claude
Browse files

Refactor calculate_results to use pandas DataFrames

- Complete rewrite of data processing pipeline using pandas
- Single source of truth: main DataFrame with all predictions and actuals
- Vectorized calculations for all financial metrics
- Clean separation: data collection → transformation → aggregation
- Eliminated nested loops and manual accumulation
- Much more maintainable and easier to debug
- Includes DataFrame in results for future enhancements
parent 0f09420d
Loading
Loading
Loading
Loading
+146 −138
Original line number Diff line number Diff line
@@ -10,6 +10,9 @@ import json
import os
from datetime import datetime
from io import BytesIO
import pandas as pd
import numpy as np

try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Alignment
@@ -108,7 +111,7 @@ def calculate_accuracy(actual_values, predicted_values):

def calculate_results(predictions, test_data):
    """
    Calculate financial results comparing predictions to actual data
    Calculate financial results comparing predictions to actual data using pandas

    Args:
        predictions: Dict of {watch_id: [12 monthly predictions]}
@@ -117,162 +120,167 @@ def calculate_results(predictions, test_data):
    Returns:
        Dictionary with detailed results
    """
    watches = load_training_data()['metadata']['watches']
    watches_meta = load_training_data()['metadata']['watches']
    watch_lookup = {w['id']: w for w in watches_meta}

    results = {
        'monthly_comparison': [],
        'watch_performance': {},
        'financial_summary': {
            'total_revenue': 0,
            'total_costs': 0,
            'total_profit': 0,
            'lost_revenue': 0,
            'excess_costs': 0
        },
        'prediction_accuracy': {}
    }

    # Process each month
    # Build the main DataFrame with all prediction and actual data
    rows = []
    for month_idx, month_data in enumerate(test_data):
        month_comparison = {
            'month': month_idx + 1,
            'date': month_data['date'],
            'watches': []
        }

        # Process each watch
        for watch_data in month_data['watches']:
            watch_id = watch_data['watch_id']
            watch = next(w for w in watches if w['id'] == watch_id)

            # Get student's prediction
            predicted_demand = predictions.get(str(watch_id), [0] * 12)[month_idx]
            actual_demand = watch_data['demand']

            # Calculate production based on prediction
            # Student's prediction drives production
            production = int(predicted_demand)
            rows.append({
                'month': month_idx + 1,
                'date': month_data['date'],
                'watch_id': watch_id,
                'watch_name': watch_lookup[watch_id]['name'],
                'predicted_demand': predicted_demand,
                'actual_demand': watch_data['demand'],
                'sell_price': watch_lookup[watch_id]['sell_price'],
                'base_cost': watch_lookup[watch_id]['base_cost']
            })

            # Calculate inventory (simple model: start with previous month's end)
            if month_idx == 0:
                inventory_start = 100  # Starting inventory for year 11
            else:
                prev_watch = [w for w in results['monthly_comparison'][month_idx-1]['watches']
                             if w['watch_id'] == watch_id][0]
                inventory_start = prev_watch['inventory_end']
    df = pd.DataFrame(rows)

            # Calculate what actually happens
    # Calculate inventory and operations month by month
    inventory_data = []
    for watch_id in df['watch_id'].unique():
        watch_df = df[df['watch_id'] == watch_id].sort_values('month')
        inventory_start = 100  # Initial inventory

        for idx, row in watch_df.iterrows():
            production = int(row['predicted_demand'])
            available = inventory_start + production
            units_sold = min(actual_demand, available)
            units_sold = min(row['actual_demand'], available)
            inventory_end = available - units_sold
            stockout = max(0, actual_demand - units_sold)

            # Financial calculations
            revenue = units_sold * watch['sell_price']
            production_cost = production * watch['base_cost']
            labor_cost = production * 20.0
            holding_cost = inventory_end * watch['base_cost'] * 0.02

            # Lost revenue from stockouts
            lost_revenue = stockout * watch['sell_price']

            # Excess costs from overproduction
            excess_inventory = max(0, inventory_end - 50)  # 50 is healthy safety stock
            excess_cost = excess_inventory * watch['base_cost'] * 0.05  # 5% waste/obsolescence
            stockout = max(0, row['actual_demand'] - units_sold)

            total_costs = production_cost + labor_cost + holding_cost + excess_cost
            profit = revenue - total_costs

            # Prediction error
            error = abs(predicted_demand - actual_demand)
            error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0

            watch_result = {
                'watch_id': watch_id,
                'watch_name': watch['name'],
                'predicted_demand': predicted_demand,
                'actual_demand': actual_demand,
                'production': production,
            inventory_data.append({
                'month': row['month'],
                'watch_id': row['watch_id'],
                'inventory_start': inventory_start,
                'inventory_end': inventory_end,
                'production': production,
                'units_sold': units_sold,
                'stockout': stockout,
                'revenue': round(revenue, 2),
                'production_cost': round(production_cost, 2),
                'labor_cost': round(labor_cost, 2),
                'holding_cost': round(holding_cost, 2),
                'excess_cost': round(excess_cost, 2),
                'lost_revenue': round(lost_revenue, 2),
                'total_costs': round(total_costs, 2),
                'profit': round(profit, 2),
                'error': error,
                'error_pct': round(error_pct, 1)
            }
                'inventory_end': inventory_end,
                'stockout': stockout
            })

            month_comparison['watches'].append(watch_result)

            # Accumulate totals
            results['financial_summary']['total_revenue'] += revenue
            results['financial_summary']['total_costs'] += total_costs
            results['financial_summary']['total_profit'] += profit
            results['financial_summary']['lost_revenue'] += lost_revenue
            results['financial_summary']['excess_costs'] += excess_cost

            # Accumulate watch-level statistics
            if watch_id not in results['watch_performance']:
                results['watch_performance'][watch_id] = {
                    'watch_name': watch['name'],
                    'total_predicted': 0,
                    'total_actual': 0,
                    'total_sold': 0,
                    'total_stockout': 0,
                    'total_revenue': 0,
                    'total_profit': 0,
                    'actual_values': [],
                    'predicted_values': []
                }
            inventory_start = inventory_end  # Next month starts with this month's end

    inventory_df = pd.DataFrame(inventory_data)

            perf = results['watch_performance'][watch_id]
            perf['total_predicted'] += predicted_demand
            perf['total_actual'] += actual_demand
            perf['total_sold'] += units_sold
            perf['total_stockout'] += stockout
            perf['total_revenue'] += revenue
            perf['total_profit'] += profit
            perf['actual_values'].append(actual_demand)
            perf['predicted_values'].append(predicted_demand)
    # Merge inventory data back to main df
    df = df.merge(inventory_df, on=['month', 'watch_id'], how='left')

        results['monthly_comparison'].append(month_comparison)
    # Calculate financial metrics
    df['revenue'] = df['units_sold'] * df['sell_price']
    df['production_cost'] = df['production'] * df['base_cost']
    df['labor_cost'] = df['production'] * 20.0
    df['holding_cost'] = df['inventory_end'] * df['base_cost'] * 0.02
    df['lost_revenue'] = df['stockout'] * df['sell_price']

    # Round financial summary
    for key in results['financial_summary']:
        results['financial_summary'][key] = round(results['financial_summary'][key], 2)
    # Excess costs from overproduction (inventory > 50 safety stock)
    df['excess_inventory'] = df['inventory_end'].apply(lambda x: max(0, x - 50))
    df['excess_cost'] = df['excess_inventory'] * df['base_cost'] * 0.05

    # Calculate prediction metrics using proper formulas
    all_actual_values = []
    all_predicted_values = []
    df['total_costs'] = df['production_cost'] + df['labor_cost'] + df['holding_cost'] + df['excess_cost']
    df['profit'] = df['revenue'] - df['total_costs']

    for watch_id, perf in results['watch_performance'].items():
        # Calculate MAPE for this watch using the proper formula
        perf['mape'] = round(calculate_mape(perf['actual_values'], perf['predicted_values']), 1)
    # Calculate prediction errors
    df['error'] = (df['predicted_demand'] - df['actual_demand']).abs()
    df['error_pct'] = df.apply(
        lambda row: (row['error'] / row['actual_demand'] * 100) if row['actual_demand'] > 0 else 0,
        axis=1
    )

        # Calculate accuracy for this watch using the proper formula
        perf['accuracy'] = round(calculate_accuracy(perf['actual_values'], perf['predicted_values']), 1)
    # Round financial columns
    financial_cols = ['revenue', 'production_cost', 'labor_cost', 'holding_cost',
                      'excess_cost', 'lost_revenue', 'total_costs', 'profit']
    for col in financial_cols:
        df[col] = df[col].round(2)
    df['error_pct'] = df['error_pct'].round(1)

    # Calculate watch-level performance metrics
    watch_performance = {}
    for watch_id in df['watch_id'].unique():
        watch_df = df[df['watch_id'] == watch_id]

        actual_values = watch_df['actual_demand'].tolist()
        predicted_values = watch_df['predicted_demand'].tolist()

        watch_performance[watch_id] = {
            'watch_name': watch_df['watch_name'].iloc[0],
            'total_predicted': int(watch_df['predicted_demand'].sum()),
            'total_actual': int(watch_df['actual_demand'].sum()),
            'total_sold': int(watch_df['units_sold'].sum()),
            'total_stockout': int(watch_df['stockout'].sum()),
            'total_revenue': float(watch_df['revenue'].sum()),
            'total_profit': float(watch_df['profit'].sum()),
            'mape': round(calculate_mape(actual_values, predicted_values), 1),
            'accuracy': round(calculate_accuracy(actual_values, predicted_values), 1)
        }

        # Accumulate for overall metrics
        all_actual_values.extend(perf['actual_values'])
        all_predicted_values.extend(perf['predicted_values'])
    # Calculate overall prediction accuracy
    all_actual = df['actual_demand'].tolist()
    all_predicted = df['predicted_demand'].tolist()

    # Calculate overall metrics using proper formulas
    overall_mape = calculate_mape(all_actual_values, all_predicted_values)
    overall_accuracy = calculate_accuracy(all_actual_values, all_predicted_values)
    prediction_accuracy = {
        'mape': round(calculate_mape(all_actual, all_predicted), 1),
        'accuracy': round(calculate_accuracy(all_actual, all_predicted), 1)
    }

    results['prediction_accuracy'] = {
        'mape': round(overall_mape, 1),
        'accuracy': round(overall_accuracy, 1)
    # Calculate financial summary
    financial_summary = {
        'total_revenue': round(df['revenue'].sum(), 2),
        'total_costs': round(df['total_costs'].sum(), 2),
        'total_profit': round(df['profit'].sum(), 2),
        'lost_revenue': round(df['lost_revenue'].sum(), 2),
        'excess_costs': round(df['excess_cost'].sum(), 2)
    }

    return results
    # Build monthly comparison structure for templates
    monthly_comparison = []
    for month in df['month'].unique():
        month_df = df[df['month'] == month]

        watches_list = []
        for _, row in month_df.iterrows():
            watches_list.append({
                'watch_id': int(row['watch_id']),
                'watch_name': row['watch_name'],
                'predicted_demand': int(row['predicted_demand']),
                'actual_demand': int(row['actual_demand']),
                'production': int(row['production']),
                'inventory_start': int(row['inventory_start']),
                'inventory_end': int(row['inventory_end']),
                'units_sold': int(row['units_sold']),
                'stockout': int(row['stockout']),
                'revenue': float(row['revenue']),
                'production_cost': float(row['production_cost']),
                'labor_cost': float(row['labor_cost']),
                'holding_cost': float(row['holding_cost']),
                'excess_cost': float(row['excess_cost']),
                'lost_revenue': float(row['lost_revenue']),
                'total_costs': float(row['total_costs']),
                'profit': float(row['profit']),
                'error': float(row['error']),
                'error_pct': float(row['error_pct'])
            })

        monthly_comparison.append({
            'month': int(month),
            'date': month_df['date'].iloc[0],
            'watches': watches_list
        })

    return {
        'monthly_comparison': monthly_comparison,
        'watch_performance': watch_performance,
        'financial_summary': financial_summary,
        'prediction_accuracy': prediction_accuracy,
        'dataframe': df  # Include for potential future use
    }


@app.route('/')