Merge pull request #9 from utorque/claude/fix-accuracy-metrics-01BnyYDEPpgpCjQ7TjqWYXDV (b7e6e8b7) · Commits · Barthelet Thibault / horloml

forecast_app/app.py

+209 −139

Original line number	Diff line number	Diff line
		@@ -10,6 +10,9 @@ import json
		import os
		from datetime import datetime
		from io import BytesIO
		import pandas as pd
		import numpy as np

		try:
		from openpyxl import Workbook
		from openpyxl.styles import Font, PatternFill, Alignment
		@@ -23,8 +26,8 @@ app.config['SESSION_TYPE'] = 'filesystem'

		# Load dataset
		DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
		TRAINING_DATA_PATH = os.path.join(DATA_DIR, 'sim_supply_chain_data_training.json')
		TEST_DATA_PATH = os.path.join(DATA_DIR, 'sim_supply_chain_data_test.json')
		TRAINING_DATA_PATH = os.path.join(DATA_DIR, 'sim2_supply_chain_data_training.json')
		TEST_DATA_PATH = os.path.join(DATA_DIR, 'sim2_supply_chain_data_test.json')


		def load_training_data():
		@@ -39,9 +42,76 @@ def load_test_data():
		return json.load(f)


		def calculate_mape(actual_values, predicted_values):
		"""
		Calculate Mean Absolute Percentage Error (MAPE)

		MAPE = (1/n) * Σ(\|actual - predicted\| / \|actual\|) * 100
		where n is the count of non-zero actual values

		Args:
		actual_values: List of actual values
		predicted_values: List of predicted values

		Returns:
		MAPE as percentage
		"""
		if not actual_values or not predicted_values:
		return 0.0

		if len(actual_values) != len(predicted_values):
		return 0.0

		total_percentage_error = 0.0
		valid_count = 0

		for actual, predicted in zip(actual_values, predicted_values):
		if actual != 0: # Only include non-zero actuals
		percentage_error = abs(actual - predicted) / abs(actual)
		total_percentage_error += percentage_error
		valid_count += 1

		if valid_count == 0:
		return 0.0

		mape = (total_percentage_error / valid_count) * 100
		return mape


		def calculate_accuracy(actual_values, predicted_values):
		"""
		Calculate prediction accuracy as the complement of total error ratio

		Accuracy = (1 - Σ\|actual - predicted\| / Σactual) * 100

		Args:
		actual_values: List of actual values
		predicted_values: List of predicted values

		Returns:
		Accuracy as percentage
		"""
		if not actual_values or not predicted_values:
		return 0.0

		if len(actual_values) != len(predicted_values):
		return 0.0

		total_actual = sum(actual_values)
		if total_actual == 0:
		return 0.0

		total_absolute_error = 0.0
		for actual, predicted in zip(actual_values, predicted_values):
		total_absolute_error += abs(actual - predicted)

		accuracy = (1 - (total_absolute_error / total_actual)) * 100
		return accuracy


		def calculate_results(predictions, test_data):
		"""
		Calculate financial results comparing predictions to actual data
		Calculate financial results comparing predictions to actual data using pandas

		Args:
		predictions: Dict of {watch_id: [12 monthly predictions]}
		@@ -50,166 +120,166 @@ def calculate_results(predictions, test_data):
		Returns:
		Dictionary with detailed results
		"""
		watches = load_training_data()['metadata']['watches']
		watches_meta = load_training_data()['metadata']['watches']
		watch_lookup = {w['id']: w for w in watches_meta}

		results = {
		'monthly_comparison': [],
		'watch_performance': {},
		'financial_summary': {
		'total_revenue': 0,
		'total_costs': 0,
		'total_profit': 0,
		'lost_revenue': 0,
		'excess_costs': 0
		},
		'prediction_accuracy': {}
		}

		# Process each month
		# Build the main DataFrame with all prediction and actual data
		rows = []
		for month_idx, month_data in enumerate(test_data):
		month_comparison = {
		'month': month_idx + 1,
		'date': month_data['date'],
		'watches': []
		}

		# Process each watch
		for watch_data in month_data['watches']:
		watch_id = watch_data['watch_id']
		watch = next(w for w in watches if w['id'] == watch_id)

		# Get student's prediction
		predicted_demand = predictions.get(str(watch_id), [0] * 12)[month_idx]
		actual_demand = watch_data['demand']

		# Calculate production based on prediction
		# Student's prediction drives production
		production = int(predicted_demand)
		rows.append({
		'month': month_idx + 1,
		'date': month_data['date'],
		'watch_id': watch_id,
		'watch_name': watch_lookup[watch_id]['name'],
		'predicted_demand': predicted_demand,
		'actual_demand': watch_data['demand'],
		'sell_price': watch_lookup[watch_id]['sell_price'],
		'base_cost': watch_lookup[watch_id]['base_cost']
		})

		df = pd.DataFrame(rows)

		# Calculate inventory (simple model: start with previous month's end)
		if month_idx == 0:
		inventory_start = 100 # Starting inventory for year 11
		else:
		prev_watch = [w for w in results['monthly_comparison'][month_idx-1]['watches']
		if w['watch_id'] == watch_id][0]
		inventory_start = prev_watch['inventory_end']
		# Calculate inventory and operations month by month
		inventory_data = []
		for watch_id in df['watch_id'].unique():
		watch_df = df[df['watch_id'] == watch_id].sort_values('month')
		inventory_start = 100 # Initial inventory

		# Calculate what actually happens
		for idx, row in watch_df.iterrows():
		production = int(row['predicted_demand'])
		available = inventory_start + production
		units_sold = min(actual_demand, available)
		units_sold = min(row['actual_demand'], available)
		inventory_end = available - units_sold
		stockout = max(0, actual_demand - units_sold)
		stockout = max(0, row['actual_demand'] - units_sold)

		# Financial calculations
		revenue = units_sold * watch['sell_price']
		production_cost = production * watch['base_cost']
		labor_cost = production * 20.0
		holding_cost = inventory_end * watch['base_cost'] * 0.02
		inventory_data.append({
		'month': row['month'],
		'watch_id': row['watch_id'],
		'inventory_start': inventory_start,
		'production': production,
		'units_sold': units_sold,
		'inventory_end': inventory_end,
		'stockout': stockout
		})

		# Lost revenue from stockouts
		lost_revenue = stockout * watch['sell_price']
		inventory_start = inventory_end # Next month starts with this month's end

		# Excess costs from overproduction
		excess_inventory = max(0, inventory_end - 50) # 50 is healthy safety stock
		excess_cost = excess_inventory * watch['base_cost'] * 0.05 # 5% waste/obsolescence
		inventory_df = pd.DataFrame(inventory_data)

		total_costs = production_cost + labor_cost + holding_cost + excess_cost
		profit = revenue - total_costs
		# Merge inventory data back to main df
		df = df.merge(inventory_df, on=['month', 'watch_id'], how='left')

		# Prediction error
		error = abs(predicted_demand - actual_demand)
		error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0
		# Calculate financial metrics
		df['revenue'] = df['units_sold'] * df['sell_price']
		df['production_cost'] = df['production'] * df['base_cost']
		df['labor_cost'] = df['production'] * 20.0
		df['holding_cost'] = df['inventory_end'] * df['base_cost'] * 0.02
		df['lost_revenue'] = df['stockout'] * df['sell_price']

		watch_result = {
		'watch_id': watch_id,
		'watch_name': watch['name'],
		'predicted_demand': predicted_demand,
		'actual_demand': actual_demand,
		'production': production,
		'inventory_start': inventory_start,
		'inventory_end': inventory_end,
		'units_sold': units_sold,
		'stockout': stockout,
		'revenue': round(revenue, 2),
		'production_cost': round(production_cost, 2),
		'labor_cost': round(labor_cost, 2),
		'holding_cost': round(holding_cost, 2),
		'excess_cost': round(excess_cost, 2),
		'lost_revenue': round(lost_revenue, 2),
		'total_costs': round(total_costs, 2),
		'profit': round(profit, 2),
		'error': error,
		'error_pct': round(error_pct, 1)
		}

		month_comparison['watches'].append(watch_result)

		# Accumulate totals
		results['financial_summary']['total_revenue'] += revenue
		results['financial_summary']['total_costs'] += total_costs
		results['financial_summary']['total_profit'] += profit
		results['financial_summary']['lost_revenue'] += lost_revenue
		results['financial_summary']['excess_costs'] += excess_cost

		# Accumulate watch-level statistics
		if watch_id not in results['watch_performance']:
		results['watch_performance'][watch_id] = {
		'watch_name': watch['name'],
		'total_predicted': 0,
		'total_actual': 0,
		'total_sold': 0,
		'total_stockout': 0,
		'total_revenue': 0,
		'total_profit': 0,
		'errors': []
		}

		perf = results['watch_performance'][watch_id]
		perf['total_predicted'] += predicted_demand
		perf['total_actual'] += actual_demand
		perf['total_sold'] += units_sold
		perf['total_stockout'] += stockout
		perf['total_revenue'] += revenue
		perf['total_profit'] += profit
		perf['errors'].append(error_pct)
		# Excess costs from overproduction (inventory > 50 safety stock)
		df['excess_inventory'] = df['inventory_end'].apply(lambda x: max(0, x - 50))
		df['excess_cost'] = df['excess_inventory'] * df['base_cost'] * 0.05

		results['monthly_comparison'].append(month_comparison)
		df['total_costs'] = df['production_cost'] + df['labor_cost'] + df['holding_cost'] + df['excess_cost']
		df['profit'] = df['revenue'] - df['total_costs']

		# Round financial summary
		for key in results['financial_summary']:
		results['financial_summary'][key] = round(results['financial_summary'][key], 2)
		# Calculate prediction errors
		df['error'] = (df['predicted_demand'] - df['actual_demand']).abs()
		df['error_pct'] = df.apply(
		lambda row: (row['error'] / row['actual_demand'] * 100) if row['actual_demand'] > 0 else 0,
		axis=1
		)

		# Calculate overall prediction accuracy
		all_errors = []
		for watch_id, perf in results['watch_performance'].items():
		avg_error = sum(perf['errors']) / len(perf['errors'])
		perf['avg_error_pct'] = round(avg_error, 1)
		perf['accuracy'] = round(100 - avg_error, 1)
		all_errors.extend(perf['errors'])

		results['prediction_accuracy'] = {
		'overall_error_pct': round(sum(all_errors) / len(all_errors), 1),
		'overall_accuracy': round(100 - (sum(all_errors) / len(all_errors)), 1)
		# Round financial columns
		financial_cols = ['revenue', 'production_cost', 'labor_cost', 'holding_cost',
		'excess_cost', 'lost_revenue', 'total_costs', 'profit']
		for col in financial_cols:
		df[col] = df[col].round(2)
		df['error_pct'] = df['error_pct'].round(1)

		# Calculate watch-level performance metrics
		watch_performance = {}
		for watch_id in df['watch_id'].unique():
		watch_df = df[df['watch_id'] == watch_id]

		actual_values = watch_df['actual_demand'].tolist()
		predicted_values = watch_df['predicted_demand'].tolist()

		watch_performance[int(watch_id)] = { # Convert numpy.int64 to Python int
		'watch_name': watch_df['watch_name'].iloc[0],
		'total_predicted': int(watch_df['predicted_demand'].sum()),
		'total_actual': int(watch_df['actual_demand'].sum()),
		'total_sold': int(watch_df['units_sold'].sum()),
		'total_stockout': int(watch_df['stockout'].sum()),
		'total_revenue': float(watch_df['revenue'].sum()),
		'total_profit': float(watch_df['profit'].sum()),
		'mape': round(calculate_mape(actual_values, predicted_values), 1),
		'accuracy': round(calculate_accuracy(actual_values, predicted_values), 1)
		}

		# Calculate score (0-100)
		# Based on: accuracy (50%), profit (30%), service level (20%)
		accuracy_score = results['prediction_accuracy']['overall_accuracy'] * 0.5

		# Profit score (normalize to potential max profit)
		potential_profit = results['financial_summary']['total_revenue'] # If perfect predictions
		actual_profit = results['financial_summary']['total_profit']
		profit_score = min(30, (actual_profit / potential_profit * 30)) if potential_profit > 0 else 0
		# Calculate overall prediction accuracy
		all_actual = df['actual_demand'].tolist()
		all_predicted = df['predicted_demand'].tolist()

		# Service level score (based on stockouts)
		total_actual = sum(perf['total_actual'] for perf in results['watch_performance'].values())
		total_sold = sum(perf['total_sold'] for perf in results['watch_performance'].values())
		service_level = (total_sold / total_actual * 100) if total_actual > 0 else 0
		service_score = service_level * 0.2
		prediction_accuracy = {
		'mape': round(calculate_mape(all_actual, all_predicted), 1),
		'accuracy': round(calculate_accuracy(all_actual, all_predicted), 1)
		}

		results['overall_score'] = round(accuracy_score + profit_score + service_score, 1)
		# Calculate financial summary
		financial_summary = {
		'total_revenue': round(df['revenue'].sum(), 2),
		'total_costs': round(df['total_costs'].sum(), 2),
		'total_profit': round(df['profit'].sum(), 2),
		'lost_revenue': round(df['lost_revenue'].sum(), 2),
		'excess_costs': round(df['excess_cost'].sum(), 2)
		}

		return results
		# Build monthly comparison structure for templates
		monthly_comparison = []
		for month in sorted(df['month'].unique()): # Sort months
		month_df = df[df['month'] == month]

		watches_list = []
		for _, row in month_df.iterrows():
		watches_list.append({
		'watch_id': int(row['watch_id']),
		'watch_name': row['watch_name'],
		'predicted_demand': int(row['predicted_demand']),
		'actual_demand': int(row['actual_demand']),
		'production': int(row['production']),
		'inventory_start': int(row['inventory_start']),
		'inventory_end': int(row['inventory_end']),
		'units_sold': int(row['units_sold']),
		'stockout': int(row['stockout']),
		'revenue': float(row['revenue']),
		'production_cost': float(row['production_cost']),
		'labor_cost': float(row['labor_cost']),
		'holding_cost': float(row['holding_cost']),
		'excess_cost': float(row['excess_cost']),
		'lost_revenue': float(row['lost_revenue']),
		'total_costs': float(row['total_costs']),
		'profit': float(row['profit']),
		'error': float(row['error']),
		'error_pct': float(row['error_pct'])
		})

		monthly_comparison.append({
		'month': int(month),
		'date': month_df['date'].iloc[0],
		'watches': watches_list
		})

		return {
		'monthly_comparison': monthly_comparison,
		'watch_performance': watch_performance,
		'financial_summary': financial_summary,
		'prediction_accuracy': prediction_accuracy
		}


		@app.route('/')

forecast_app/smart_data_generator.py

0 → 100644

+536 −0

File added.

Preview size limit exceeded, changes collapsed.

forecast_app/templates/results.html

+19 −12

Original line number	Diff line number	Diff line
		@@ -14,15 +14,22 @@

		<section class="section">
		<div class="container">
		<!-- Overall Score -->
		<!-- Accuracy Metrics -->
		<div class="columns">
		<div class="column is-6 is-offset-3">
		<div class="column is-6">
		<div class="box has-text-centered" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white;">
		<h3 class="title is-3" style="color: white;">Overall Performance Score</h3>
		<div class="score-display">{{ results.overall_score }}/100</div>
		<p class="subtitle is-5" style="color: white;">
		Prediction Accuracy: {{ results.prediction_accuracy.overall_accuracy }}%
		</p>
		<h3 class="title is-4" style="color: white;">MAPE</h3>
		<p class="subtitle is-6" style="color: white; margin-bottom: 0.5rem;">Mean Absolute Percentage Error</p>
		<div class="score-display">{{ results.prediction_accuracy.mape }}%</div>
		<p class="subtitle is-6" style="color: white; margin-top: 0.5rem;">Lower is better</p>
		</div>
		</div>
		<div class="column is-6">
		<div class="box has-text-centered" style="background: linear-gradient(135deg, #764ba2 0%, #667eea 100%); color: white;">
		<h3 class="title is-4" style="color: white;">Prediction Accuracy</h3>
		<p class="subtitle is-6" style="color: white; margin-bottom: 0.5rem;">1 - (Total Error / Total Actual)</p>
		<div class="score-display">{{ results.prediction_accuracy.accuracy }}%</div>
		<p class="subtitle is-6" style="color: white; margin-top: 0.5rem;">Higher is better</p>
		</div>
		</div>
		</div>
		@@ -195,19 +202,19 @@
		<div class="box">
		<h3 class="title is-4">Key Insights</h3>
		<div class="content">
		{% if results.prediction_accuracy.overall_accuracy >= 90 %}
		{% if results.prediction_accuracy.accuracy >= 90 %}
		<div class="notification is-success">
		<p><strong>Excellent work!</strong> Your predictions were highly accurate ({{ results.prediction_accuracy.overall_accuracy }}%).</p>
		<p><strong>Excellent work!</strong> Your predictions were highly accurate ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
		<p>You demonstrated strong understanding of demand patterns and seasonality.</p>
		</div>
		{% elif results.prediction_accuracy.overall_accuracy >= 75 %}
		{% elif results.prediction_accuracy.accuracy >= 75 %}
		<div class="notification is-info">
		<p><strong>Good job!</strong> Your predictions were reasonably accurate ({{ results.prediction_accuracy.overall_accuracy }}%).</p>
		<p><strong>Good job!</strong> Your predictions were reasonably accurate ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
		<p>There's room for improvement - review the monthly patterns to identify where you can refine your forecasts.</p>
		</div>
		{% else %}
		<div class="notification is-warning">
		<p><strong>Keep learning!</strong> Your predictions had significant variance from actual demand ({{ results.prediction_accuracy.overall_accuracy }}% accuracy).</p>
		<p><strong>Keep learning!</strong> Your predictions had significant variance from actual demand ({{ results.prediction_accuracy.accuracy }}% accuracy, {{ results.prediction_accuracy.mape }}% MAPE).</p>
		<p>Consider: Did you account for seasonal patterns? Did you follow the growth trends?</p>
		</div>
		{% endif %}