Unverified Commit b7e6e8b7 authored by utorque's avatar utorque Committed by GitHub
Browse files

Merge pull request #9 from utorque/claude/fix-accuracy-metrics-01BnyYDEPpgpCjQ7TjqWYXDV

Integrated with original-like sim and fixed metrics
parents 32842673 e4b6f3e9
Loading
Loading
Loading
Loading
+209 −139
Original line number Diff line number Diff line
@@ -10,6 +10,9 @@ import json
import os
from datetime import datetime
from io import BytesIO
import pandas as pd
import numpy as np

try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Alignment
@@ -23,8 +26,8 @@ app.config['SESSION_TYPE'] = 'filesystem'

# Load dataset
DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
TRAINING_DATA_PATH = os.path.join(DATA_DIR, 'sim_supply_chain_data_training.json')
TEST_DATA_PATH = os.path.join(DATA_DIR, 'sim_supply_chain_data_test.json')
TRAINING_DATA_PATH = os.path.join(DATA_DIR, 'sim2_supply_chain_data_training.json')
TEST_DATA_PATH = os.path.join(DATA_DIR, 'sim2_supply_chain_data_test.json')


def load_training_data():
@@ -39,9 +42,76 @@ def load_test_data():
        return json.load(f)


def calculate_mape(actual_values, predicted_values):
    """
    Calculate Mean Absolute Percentage Error (MAPE)

    MAPE = (1/n) * Σ(|actual - predicted| / |actual|) * 100
    where n is the count of non-zero actual values

    Args:
        actual_values: List of actual values
        predicted_values: List of predicted values

    Returns:
        MAPE as percentage
    """
    if not actual_values or not predicted_values:
        return 0.0

    if len(actual_values) != len(predicted_values):
        return 0.0

    total_percentage_error = 0.0
    valid_count = 0

    for actual, predicted in zip(actual_values, predicted_values):
        if actual != 0:  # Only include non-zero actuals
            percentage_error = abs(actual - predicted) / abs(actual)
            total_percentage_error += percentage_error
            valid_count += 1

    if valid_count == 0:
        return 0.0

    mape = (total_percentage_error / valid_count) * 100
    return mape


def calculate_accuracy(actual_values, predicted_values):
    """
    Calculate prediction accuracy as the complement of total error ratio

    Accuracy = (1 - Σ|actual - predicted| / Σactual) * 100

    Args:
        actual_values: List of actual values
        predicted_values: List of predicted values

    Returns:
        Accuracy as percentage
    """
    if not actual_values or not predicted_values:
        return 0.0

    if len(actual_values) != len(predicted_values):
        return 0.0

    total_actual = sum(actual_values)
    if total_actual == 0:
        return 0.0

    total_absolute_error = 0.0
    for actual, predicted in zip(actual_values, predicted_values):
        total_absolute_error += abs(actual - predicted)

    accuracy = (1 - (total_absolute_error / total_actual)) * 100
    return accuracy


def calculate_results(predictions, test_data):
    """
    Calculate financial results comparing predictions to actual data
    Calculate financial results comparing predictions to actual data using pandas

    Args:
        predictions: Dict of {watch_id: [12 monthly predictions]}
@@ -50,166 +120,166 @@ def calculate_results(predictions, test_data):
    Returns:
        Dictionary with detailed results
    """
    watches = load_training_data()['metadata']['watches']
    watches_meta = load_training_data()['metadata']['watches']
    watch_lookup = {w['id']: w for w in watches_meta}

    results = {
        'monthly_comparison': [],
        'watch_performance': {},
        'financial_summary': {
            'total_revenue': 0,
            'total_costs': 0,
            'total_profit': 0,
            'lost_revenue': 0,
            'excess_costs': 0
        },
        'prediction_accuracy': {}
    }

    # Process each month
    # Build the main DataFrame with all prediction and actual data
    rows = []
    for month_idx, month_data in enumerate(test_data):
        month_comparison = {
            'month': month_idx + 1,
            'date': month_data['date'],
            'watches': []
        }

        # Process each watch
        for watch_data in month_data['watches']:
            watch_id = watch_data['watch_id']
            watch = next(w for w in watches if w['id'] == watch_id)

            # Get student's prediction
            predicted_demand = predictions.get(str(watch_id), [0] * 12)[month_idx]
            actual_demand = watch_data['demand']

            # Calculate production based on prediction
            # Student's prediction drives production
            production = int(predicted_demand)
            rows.append({
                'month': month_idx + 1,
                'date': month_data['date'],
                'watch_id': watch_id,
                'watch_name': watch_lookup[watch_id]['name'],
                'predicted_demand': predicted_demand,
                'actual_demand': watch_data['demand'],
                'sell_price': watch_lookup[watch_id]['sell_price'],
                'base_cost': watch_lookup[watch_id]['base_cost']
            })

    df = pd.DataFrame(rows)

            # Calculate inventory (simple model: start with previous month's end)
            if month_idx == 0:
                inventory_start = 100  # Starting inventory for year 11
            else:
                prev_watch = [w for w in results['monthly_comparison'][month_idx-1]['watches']
                             if w['watch_id'] == watch_id][0]
                inventory_start = prev_watch['inventory_end']
    # Calculate inventory and operations month by month
    inventory_data = []
    for watch_id in df['watch_id'].unique():
        watch_df = df[df['watch_id'] == watch_id].sort_values('month')
        inventory_start = 100  # Initial inventory

            # Calculate what actually happens
        for idx, row in watch_df.iterrows():
            production = int(row['predicted_demand'])
            available = inventory_start + production
            units_sold = min(actual_demand, available)
            units_sold = min(row['actual_demand'], available)
            inventory_end = available - units_sold
            stockout = max(0, actual_demand - units_sold)
            stockout = max(0, row['actual_demand'] - units_sold)

            # Financial calculations
            revenue = units_sold * watch['sell_price']
            production_cost = production * watch['base_cost']
            labor_cost = production * 20.0
            holding_cost = inventory_end * watch['base_cost'] * 0.02
            inventory_data.append({
                'month': row['month'],
                'watch_id': row['watch_id'],
                'inventory_start': inventory_start,
                'production': production,
                'units_sold': units_sold,
                'inventory_end': inventory_end,
                'stockout': stockout
            })

            # Lost revenue from stockouts
            lost_revenue = stockout * watch['sell_price']
            inventory_start = inventory_end  # Next month starts with this month's end

            # Excess costs from overproduction
            excess_inventory = max(0, inventory_end - 50)  # 50 is healthy safety stock
            excess_cost = excess_inventory * watch['base_cost'] * 0.05  # 5% waste/obsolescence
    inventory_df = pd.DataFrame(inventory_data)

            total_costs = production_cost + labor_cost + holding_cost + excess_cost
            profit = revenue - total_costs
    # Merge inventory data back to main df
    df = df.merge(inventory_df, on=['month', 'watch_id'], how='left')

            # Prediction error
            error = abs(predicted_demand - actual_demand)
            error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0
    # Calculate financial metrics
    df['revenue'] = df['units_sold'] * df['sell_price']
    df['production_cost'] = df['production'] * df['base_cost']
    df['labor_cost'] = df['production'] * 20.0
    df['holding_cost'] = df['inventory_end'] * df['base_cost'] * 0.02
    df['lost_revenue'] = df['stockout'] * df['sell_price']

            watch_result = {
                'watch_id': watch_id,
                'watch_name': watch['name'],
                'predicted_demand': predicted_demand,
                'actual_demand': actual_demand,
                'production': production,
                'inventory_start': inventory_start,
                'inventory_end': inventory_end,
                'units_sold': units_sold,
                'stockout': stockout,
                'revenue': round(revenue, 2),
                'production_cost': round(production_cost, 2),
                'labor_cost': round(labor_cost, 2),
                'holding_cost': round(holding_cost, 2),
                'excess_cost': round(excess_cost, 2),
                'lost_revenue': round(lost_revenue, 2),
                'total_costs': round(total_costs, 2),
                'profit': round(profit, 2),
                'error': error,
                'error_pct': round(error_pct, 1)
            }

            month_comparison['watches'].append(watch_result)

            # Accumulate totals
            results['financial_summary']['total_revenue'] += revenue
            results['financial_summary']['total_costs'] += total_costs
            results['financial_summary']['total_profit'] += profit
            results['financial_summary']['lost_revenue'] += lost_revenue
            results['financial_summary']['excess_costs'] += excess_cost

            # Accumulate watch-level statistics
            if watch_id not in results['watch_performance']:
                results['watch_performance'][watch_id] = {
                    'watch_name': watch['name'],
                    'total_predicted': 0,
                    'total_actual': 0,
                    'total_sold': 0,
                    'total_stockout': 0,
                    'total_revenue': 0,
                    'total_profit': 0,
                    'errors': []
                }

            perf = results['watch_performance'][watch_id]
            perf['total_predicted'] += predicted_demand
            perf['total_actual'] += actual_demand
            perf['total_sold'] += units_sold
            perf['total_stockout'] += stockout
            perf['total_revenue'] += revenue
            perf['total_profit'] += profit
            perf['errors'].append(error_pct)
    # Excess costs from overproduction (inventory > 50 safety stock)
    df['excess_inventory'] = df['inventory_end'].apply(lambda x: max(0, x - 50))
    df['excess_cost'] = df['excess_inventory'] * df['base_cost'] * 0.05

        results['monthly_comparison'].append(month_comparison)
    df['total_costs'] = df['production_cost'] + df['labor_cost'] + df['holding_cost'] + df['excess_cost']
    df['profit'] = df['revenue'] - df['total_costs']

    # Round financial summary
    for key in results['financial_summary']:
        results['financial_summary'][key] = round(results['financial_summary'][key], 2)
    # Calculate prediction errors
    df['error'] = (df['predicted_demand'] - df['actual_demand']).abs()
    df['error_pct'] = df.apply(
        lambda row: (row['error'] / row['actual_demand'] * 100) if row['actual_demand'] > 0 else 0,
        axis=1
    )

    # Calculate overall prediction accuracy
    all_errors = []
    for watch_id, perf in results['watch_performance'].items():
        avg_error = sum(perf['errors']) / len(perf['errors'])
        perf['avg_error_pct'] = round(avg_error, 1)
        perf['accuracy'] = round(100 - avg_error, 1)
        all_errors.extend(perf['errors'])

    results['prediction_accuracy'] = {
        'overall_error_pct': round(sum(all_errors) / len(all_errors), 1),
        'overall_accuracy': round(100 - (sum(all_errors) / len(all_errors)), 1)
    # Round financial columns
    financial_cols = ['revenue', 'production_cost', 'labor_cost', 'holding_cost',
                      'excess_cost', 'lost_revenue', 'total_costs', 'profit']
    for col in financial_cols:
        df[col] = df[col].round(2)
    df['error_pct'] = df['error_pct'].round(1)

    # Calculate watch-level performance metrics
    watch_performance = {}
    for watch_id in df['watch_id'].unique():
        watch_df = df[df['watch_id'] == watch_id]

        actual_values = watch_df['actual_demand'].tolist()
        predicted_values = watch_df['predicted_demand'].tolist()

        watch_performance[int(watch_id)] = {  # Convert numpy.int64 to Python int
            'watch_name': watch_df['watch_name'].iloc[0],
            'total_predicted': int(watch_df['predicted_demand'].sum()),
            'total_actual': int(watch_df['actual_demand'].sum()),
            'total_sold': int(watch_df['units_sold'].sum()),
            'total_stockout': int(watch_df['stockout'].sum()),
            'total_revenue': float(watch_df['revenue'].sum()),
            'total_profit': float(watch_df['profit'].sum()),
            'mape': round(calculate_mape(actual_values, predicted_values), 1),
            'accuracy': round(calculate_accuracy(actual_values, predicted_values), 1)
        }

    # Calculate score (0-100)
    # Based on: accuracy (50%), profit (30%), service level (20%)
    accuracy_score = results['prediction_accuracy']['overall_accuracy'] * 0.5

    # Profit score (normalize to potential max profit)
    potential_profit = results['financial_summary']['total_revenue']  # If perfect predictions
    actual_profit = results['financial_summary']['total_profit']
    profit_score = min(30, (actual_profit / potential_profit * 30)) if potential_profit > 0 else 0
    # Calculate overall prediction accuracy
    all_actual = df['actual_demand'].tolist()
    all_predicted = df['predicted_demand'].tolist()

    # Service level score (based on stockouts)
    total_actual = sum(perf['total_actual'] for perf in results['watch_performance'].values())
    total_sold = sum(perf['total_sold'] for perf in results['watch_performance'].values())
    service_level = (total_sold / total_actual * 100) if total_actual > 0 else 0
    service_score = service_level * 0.2
    prediction_accuracy = {
        'mape': round(calculate_mape(all_actual, all_predicted), 1),
        'accuracy': round(calculate_accuracy(all_actual, all_predicted), 1)
    }

    results['overall_score'] = round(accuracy_score + profit_score + service_score, 1)
    # Calculate financial summary
    financial_summary = {
        'total_revenue': round(df['revenue'].sum(), 2),
        'total_costs': round(df['total_costs'].sum(), 2),
        'total_profit': round(df['profit'].sum(), 2),
        'lost_revenue': round(df['lost_revenue'].sum(), 2),
        'excess_costs': round(df['excess_cost'].sum(), 2)
    }

    return results
    # Build monthly comparison structure for templates
    monthly_comparison = []
    for month in sorted(df['month'].unique()):  # Sort months
        month_df = df[df['month'] == month]

        watches_list = []
        for _, row in month_df.iterrows():
            watches_list.append({
                'watch_id': int(row['watch_id']),
                'watch_name': row['watch_name'],
                'predicted_demand': int(row['predicted_demand']),
                'actual_demand': int(row['actual_demand']),
                'production': int(row['production']),
                'inventory_start': int(row['inventory_start']),
                'inventory_end': int(row['inventory_end']),
                'units_sold': int(row['units_sold']),
                'stockout': int(row['stockout']),
                'revenue': float(row['revenue']),
                'production_cost': float(row['production_cost']),
                'labor_cost': float(row['labor_cost']),
                'holding_cost': float(row['holding_cost']),
                'excess_cost': float(row['excess_cost']),
                'lost_revenue': float(row['lost_revenue']),
                'total_costs': float(row['total_costs']),
                'profit': float(row['profit']),
                'error': float(row['error']),
                'error_pct': float(row['error_pct'])
            })

        monthly_comparison.append({
            'month': int(month),
            'date': month_df['date'].iloc[0],
            'watches': watches_list
        })

    return {
        'monthly_comparison': monthly_comparison,
        'watch_performance': watch_performance,
        'financial_summary': financial_summary,
        'prediction_accuracy': prediction_accuracy
    }


@app.route('/')
+536 −0

File added.

Preview size limit exceeded, changes collapsed.

+19 −12
Original line number Diff line number Diff line
@@ -14,15 +14,22 @@

<section class="section">
    <div class="container">
        <!-- Overall Score -->
        <!-- Accuracy Metrics -->
        <div class="columns">
            <div class="column is-6 is-offset-3">
            <div class="column is-6">
                <div class="box has-text-centered" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white;">
                    <h3 class="title is-3" style="color: white;">Overall Performance Score</h3>
                    <div class="score-display">{{ results.overall_score }}/100</div>
                    <p class="subtitle is-5" style="color: white;">
                        Prediction Accuracy: {{ results.prediction_accuracy.overall_accuracy }}%
                    </p>
                    <h3 class="title is-4" style="color: white;">MAPE</h3>
                    <p class="subtitle is-6" style="color: white; margin-bottom: 0.5rem;">Mean Absolute Percentage Error</p>
                    <div class="score-display">{{ results.prediction_accuracy.mape }}%</div>
                    <p class="subtitle is-6" style="color: white; margin-top: 0.5rem;">Lower is better</p>
                </div>
            </div>
            <div class="column is-6">
                <div class="box has-text-centered" style="background: linear-gradient(135deg, #764ba2 0%, #667eea 100%); color: white;">
                    <h3 class="title is-4" style="color: white;">Prediction Accuracy</h3>
                    <p class="subtitle is-6" style="color: white; margin-bottom: 0.5rem;">1 - (Total Error / Total Actual)</p>
                    <div class="score-display">{{ results.prediction_accuracy.accuracy }}%</div>
                    <p class="subtitle is-6" style="color: white; margin-top: 0.5rem;">Higher is better</p>
                </div>
            </div>
        </div>
@@ -195,19 +202,19 @@
        <div class="box">
            <h3 class="title is-4">Key Insights</h3>
            <div class="content">
                {% if results.prediction_accuracy.overall_accuracy >= 90 %}
                {% if results.prediction_accuracy.accuracy >= 90 %}
                <div class="notification is-success">
                    <p><strong>Excellent work!</strong> Your predictions were highly accurate ({{ results.prediction_accuracy.overall_accuracy }}%).</p>
                    <p><strong>Excellent work!</strong> Your predictions were highly accurate ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
                    <p>You demonstrated strong understanding of demand patterns and seasonality.</p>
                </div>
                {% elif results.prediction_accuracy.overall_accuracy >= 75 %}
                {% elif results.prediction_accuracy.accuracy >= 75 %}
                <div class="notification is-info">
                    <p><strong>Good job!</strong> Your predictions were reasonably accurate ({{ results.prediction_accuracy.overall_accuracy }}%).</p>
                    <p><strong>Good job!</strong> Your predictions were reasonably accurate ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
                    <p>There's room for improvement - review the monthly patterns to identify where you can refine your forecasts.</p>
                </div>
                {% else %}
                <div class="notification is-warning">
                    <p><strong>Keep learning!</strong> Your predictions had significant variance from actual demand ({{ results.prediction_accuracy.overall_accuracy }}% accuracy).</p>
                    <p><strong>Keep learning!</strong> Your predictions had significant variance from actual demand ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
                    <p>Consider: Did you account for seasonal patterns? Did you follow the growth trends?</p>
                </div>
                {% endif %}