Refactor calculate_results to use pandas DataFrames (3cb575fc) · Commits · Barthelet Thibault / horloml

forecast_app/app.py

+146 −138

Original line number	Diff line number	Diff line
		@@ -10,6 +10,9 @@ import json
		import os
		from datetime import datetime
		from io import BytesIO
		import pandas as pd
		import numpy as np

		try:
		from openpyxl import Workbook
		from openpyxl.styles import Font, PatternFill, Alignment
		@@ -108,7 +111,7 @@ def calculate_accuracy(actual_values, predicted_values):

		def calculate_results(predictions, test_data):
		"""
		Calculate financial results comparing predictions to actual data
		Calculate financial results comparing predictions to actual data using pandas

		Args:
		predictions: Dict of {watch_id: [12 monthly predictions]}
		@@ -117,162 +120,167 @@ def calculate_results(predictions, test_data):
		Returns:
		Dictionary with detailed results
		"""
		watches = load_training_data()['metadata']['watches']
		watches_meta = load_training_data()['metadata']['watches']
		watch_lookup = {w['id']: w for w in watches_meta}

		results = {
		'monthly_comparison': [],
		'watch_performance': {},
		'financial_summary': {
		'total_revenue': 0,
		'total_costs': 0,
		'total_profit': 0,
		'lost_revenue': 0,
		'excess_costs': 0
		},
		'prediction_accuracy': {}
		}

		# Process each month
		# Build the main DataFrame with all prediction and actual data
		rows = []
		for month_idx, month_data in enumerate(test_data):
		month_comparison = {
		'month': month_idx + 1,
		'date': month_data['date'],
		'watches': []
		}

		# Process each watch
		for watch_data in month_data['watches']:
		watch_id = watch_data['watch_id']
		watch = next(w for w in watches if w['id'] == watch_id)

		# Get student's prediction
		predicted_demand = predictions.get(str(watch_id), [0] * 12)[month_idx]
		actual_demand = watch_data['demand']

		# Calculate production based on prediction
		# Student's prediction drives production
		production = int(predicted_demand)
		rows.append({
		'month': month_idx + 1,
		'date': month_data['date'],
		'watch_id': watch_id,
		'watch_name': watch_lookup[watch_id]['name'],
		'predicted_demand': predicted_demand,
		'actual_demand': watch_data['demand'],
		'sell_price': watch_lookup[watch_id]['sell_price'],
		'base_cost': watch_lookup[watch_id]['base_cost']
		})

		# Calculate inventory (simple model: start with previous month's end)
		if month_idx == 0:
		inventory_start = 100 # Starting inventory for year 11
		else:
		prev_watch = [w for w in results['monthly_comparison'][month_idx-1]['watches']
		if w['watch_id'] == watch_id][0]
		inventory_start = prev_watch['inventory_end']
		df = pd.DataFrame(rows)

		# Calculate what actually happens
		# Calculate inventory and operations month by month
		inventory_data = []
		for watch_id in df['watch_id'].unique():
		watch_df = df[df['watch_id'] == watch_id].sort_values('month')
		inventory_start = 100 # Initial inventory

		for idx, row in watch_df.iterrows():
		production = int(row['predicted_demand'])
		available = inventory_start + production
		units_sold = min(actual_demand, available)
		units_sold = min(row['actual_demand'], available)
		inventory_end = available - units_sold
		stockout = max(0, actual_demand - units_sold)

		# Financial calculations
		revenue = units_sold * watch['sell_price']
		production_cost = production * watch['base_cost']
		labor_cost = production * 20.0
		holding_cost = inventory_end * watch['base_cost'] * 0.02

		# Lost revenue from stockouts
		lost_revenue = stockout * watch['sell_price']

		# Excess costs from overproduction
		excess_inventory = max(0, inventory_end - 50) # 50 is healthy safety stock
		excess_cost = excess_inventory * watch['base_cost'] * 0.05 # 5% waste/obsolescence
		stockout = max(0, row['actual_demand'] - units_sold)

		total_costs = production_cost + labor_cost + holding_cost + excess_cost
		profit = revenue - total_costs

		# Prediction error
		error = abs(predicted_demand - actual_demand)
		error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0

		watch_result = {
		'watch_id': watch_id,
		'watch_name': watch['name'],
		'predicted_demand': predicted_demand,
		'actual_demand': actual_demand,
		'production': production,
		inventory_data.append({
		'month': row['month'],
		'watch_id': row['watch_id'],
		'inventory_start': inventory_start,
		'inventory_end': inventory_end,
		'production': production,
		'units_sold': units_sold,
		'stockout': stockout,
		'revenue': round(revenue, 2),
		'production_cost': round(production_cost, 2),
		'labor_cost': round(labor_cost, 2),
		'holding_cost': round(holding_cost, 2),
		'excess_cost': round(excess_cost, 2),
		'lost_revenue': round(lost_revenue, 2),
		'total_costs': round(total_costs, 2),
		'profit': round(profit, 2),
		'error': error,
		'error_pct': round(error_pct, 1)
		}
		'inventory_end': inventory_end,
		'stockout': stockout
		})

		month_comparison['watches'].append(watch_result)

		# Accumulate totals
		results['financial_summary']['total_revenue'] += revenue
		results['financial_summary']['total_costs'] += total_costs
		results['financial_summary']['total_profit'] += profit
		results['financial_summary']['lost_revenue'] += lost_revenue
		results['financial_summary']['excess_costs'] += excess_cost

		# Accumulate watch-level statistics
		if watch_id not in results['watch_performance']:
		results['watch_performance'][watch_id] = {
		'watch_name': watch['name'],
		'total_predicted': 0,
		'total_actual': 0,
		'total_sold': 0,
		'total_stockout': 0,
		'total_revenue': 0,
		'total_profit': 0,
		'actual_values': [],
		'predicted_values': []
		}
		inventory_start = inventory_end # Next month starts with this month's end

		inventory_df = pd.DataFrame(inventory_data)

		perf = results['watch_performance'][watch_id]
		perf['total_predicted'] += predicted_demand
		perf['total_actual'] += actual_demand
		perf['total_sold'] += units_sold
		perf['total_stockout'] += stockout
		perf['total_revenue'] += revenue
		perf['total_profit'] += profit
		perf['actual_values'].append(actual_demand)
		perf['predicted_values'].append(predicted_demand)
		# Merge inventory data back to main df
		df = df.merge(inventory_df, on=['month', 'watch_id'], how='left')

		results['monthly_comparison'].append(month_comparison)
		# Calculate financial metrics
		df['revenue'] = df['units_sold'] * df['sell_price']
		df['production_cost'] = df['production'] * df['base_cost']
		df['labor_cost'] = df['production'] * 20.0
		df['holding_cost'] = df['inventory_end'] * df['base_cost'] * 0.02
		df['lost_revenue'] = df['stockout'] * df['sell_price']

		# Round financial summary
		for key in results['financial_summary']:
		results['financial_summary'][key] = round(results['financial_summary'][key], 2)
		# Excess costs from overproduction (inventory > 50 safety stock)
		df['excess_inventory'] = df['inventory_end'].apply(lambda x: max(0, x - 50))
		df['excess_cost'] = df['excess_inventory'] * df['base_cost'] * 0.05

		# Calculate prediction metrics using proper formulas
		all_actual_values = []
		all_predicted_values = []
		df['total_costs'] = df['production_cost'] + df['labor_cost'] + df['holding_cost'] + df['excess_cost']
		df['profit'] = df['revenue'] - df['total_costs']

		for watch_id, perf in results['watch_performance'].items():
		# Calculate MAPE for this watch using the proper formula
		perf['mape'] = round(calculate_mape(perf['actual_values'], perf['predicted_values']), 1)
		# Calculate prediction errors
		df['error'] = (df['predicted_demand'] - df['actual_demand']).abs()
		df['error_pct'] = df.apply(
		lambda row: (row['error'] / row['actual_demand'] * 100) if row['actual_demand'] > 0 else 0,
		axis=1
		)

		# Calculate accuracy for this watch using the proper formula
		perf['accuracy'] = round(calculate_accuracy(perf['actual_values'], perf['predicted_values']), 1)
		# Round financial columns
		financial_cols = ['revenue', 'production_cost', 'labor_cost', 'holding_cost',
		'excess_cost', 'lost_revenue', 'total_costs', 'profit']
		for col in financial_cols:
		df[col] = df[col].round(2)
		df['error_pct'] = df['error_pct'].round(1)

		# Calculate watch-level performance metrics
		watch_performance = {}
		for watch_id in df['watch_id'].unique():
		watch_df = df[df['watch_id'] == watch_id]

		actual_values = watch_df['actual_demand'].tolist()
		predicted_values = watch_df['predicted_demand'].tolist()

		watch_performance[watch_id] = {
		'watch_name': watch_df['watch_name'].iloc[0],
		'total_predicted': int(watch_df['predicted_demand'].sum()),
		'total_actual': int(watch_df['actual_demand'].sum()),
		'total_sold': int(watch_df['units_sold'].sum()),
		'total_stockout': int(watch_df['stockout'].sum()),
		'total_revenue': float(watch_df['revenue'].sum()),
		'total_profit': float(watch_df['profit'].sum()),
		'mape': round(calculate_mape(actual_values, predicted_values), 1),
		'accuracy': round(calculate_accuracy(actual_values, predicted_values), 1)
		}

		# Accumulate for overall metrics
		all_actual_values.extend(perf['actual_values'])
		all_predicted_values.extend(perf['predicted_values'])
		# Calculate overall prediction accuracy
		all_actual = df['actual_demand'].tolist()
		all_predicted = df['predicted_demand'].tolist()

		# Calculate overall metrics using proper formulas
		overall_mape = calculate_mape(all_actual_values, all_predicted_values)
		overall_accuracy = calculate_accuracy(all_actual_values, all_predicted_values)
		prediction_accuracy = {
		'mape': round(calculate_mape(all_actual, all_predicted), 1),
		'accuracy': round(calculate_accuracy(all_actual, all_predicted), 1)
		}

		results['prediction_accuracy'] = {
		'mape': round(overall_mape, 1),
		'accuracy': round(overall_accuracy, 1)
		# Calculate financial summary
		financial_summary = {
		'total_revenue': round(df['revenue'].sum(), 2),
		'total_costs': round(df['total_costs'].sum(), 2),
		'total_profit': round(df['profit'].sum(), 2),
		'lost_revenue': round(df['lost_revenue'].sum(), 2),
		'excess_costs': round(df['excess_cost'].sum(), 2)
		}

		return results
		# Build monthly comparison structure for templates
		monthly_comparison = []
		for month in df['month'].unique():
		month_df = df[df['month'] == month]

		watches_list = []
		for _, row in month_df.iterrows():
		watches_list.append({
		'watch_id': int(row['watch_id']),
		'watch_name': row['watch_name'],
		'predicted_demand': int(row['predicted_demand']),
		'actual_demand': int(row['actual_demand']),
		'production': int(row['production']),
		'inventory_start': int(row['inventory_start']),
		'inventory_end': int(row['inventory_end']),
		'units_sold': int(row['units_sold']),
		'stockout': int(row['stockout']),
		'revenue': float(row['revenue']),
		'production_cost': float(row['production_cost']),
		'labor_cost': float(row['labor_cost']),
		'holding_cost': float(row['holding_cost']),
		'excess_cost': float(row['excess_cost']),
		'lost_revenue': float(row['lost_revenue']),
		'total_costs': float(row['total_costs']),
		'profit': float(row['profit']),
		'error': float(row['error']),
		'error_pct': float(row['error_pct'])
		})

		monthly_comparison.append({
		'month': int(month),
		'date': month_df['date'].iloc[0],
		'watches': watches_list
		})

		return {
		'monthly_comparison': monthly_comparison,
		'watch_performance': watch_performance,
		'financial_summary': financial_summary,
		'prediction_accuracy': prediction_accuracy,
		'dataframe': df # Include for potential future use
		}


		@app.route('/')