Loading forecast_app/data_generator.pydeleted 100644 → 0 +0 −320 Original line number Diff line number Diff line """ Data Generator for Supply Chain Forecasting Educational App Generates 11 years of realistic monthly sales and supply chain data for 3 watch models. Includes seasonal patterns, trends, and realistic variance. """ import numpy as np import json from datetime import datetime, timedelta from typing import Dict, List, Tuple class SupplyChainDataGenerator: """Generates realistic supply chain data with seasonal patterns and trends""" def __init__(self, seed: int = 42): """ Initialize the data generator Args: seed: Random seed for reproducibility """ np.random.seed(seed) # Define the 3 watch models self.watches = [ { 'id': 1, 'name': 'Luxury Classic', 'category': 'luxury', 'base_cost': 150.0, # Cost to produce 'sell_price': 500.0, # Retail price 'base_demand': 80, # Base monthly demand 'seasonality_amplitude': 0.3, # 30% seasonal variation 'trend': 0.005, # 0.5% monthly growth 'peak_months': [11, 12, 1] # November, December, January (holidays) }, { 'id': 2, 'name': 'Sport Pro', 'category': 'sport', 'base_cost': 80.0, 'sell_price': 220.0, 'base_demand': 150, 'seasonality_amplitude': 0.25, 'trend': 0.008, # 0.8% monthly growth (growing category) 'peak_months': [4, 5, 6, 7] # Spring/Summer (outdoor season) }, { 'id': 3, 'name': 'Casual Style', 'category': 'casual', 'base_cost': 40.0, 'sell_price': 120.0, 'base_demand': 200, 'seasonality_amplitude': 0.15, # Less seasonal 'trend': 0.003, # 0.3% monthly growth 'peak_months': [9, 10] # Back to school season } ] def _generate_monthly_demand(self, watch: Dict, month_index: int, total_months: int) -> float: """ Generate realistic monthly demand for a watch model Args: watch: Watch model configuration month_index: Current month index (0-based) total_months: Total number of months Returns: Monthly demand (float) """ # Base demand demand = watch['base_demand'] # Add long-term trend trend_factor = 1 + (watch['trend'] * month_index) demand *= trend_factor # Add seasonality (using sine wave with peaks at specific months) calendar_month = (month_index % 12) + 1 # 1-12 # Create seasonality boost for peak months if calendar_month in watch['peak_months']: seasonal_factor = 1 + watch['seasonality_amplitude'] else: # Use sine wave for smooth transitions phase = (2 * np.pi * month_index) / 12 seasonal_factor = 1 + (watch['seasonality_amplitude'] * 0.5 * np.sin(phase)) demand *= seasonal_factor # Add random noise (±10%) noise = np.random.normal(1.0, 0.1) demand *= noise # Ensure demand is positive and round to integer demand = max(10, round(demand)) return demand def _calculate_costs_and_revenue(self, watch: Dict, demand: float, production: int, inventory_start: int) -> Dict: """ Calculate monthly costs and revenue based on production decisions Args: watch: Watch model configuration demand: Actual customer demand production: Units produced inventory_start: Starting inventory Returns: Dictionary with financial metrics """ # Calculate what we can actually sell (limited by inventory + production) available_units = inventory_start + production units_sold = min(demand, available_units) # Calculate ending inventory inventory_end = available_units - units_sold # Revenue from sales revenue = units_sold * watch['sell_price'] # Costs production_cost = production * watch['base_cost'] labor_cost = production * 20.0 # Fixed labor cost per unit # Inventory holding cost (2% of product cost per month per unit) holding_cost = inventory_end * watch['base_cost'] * 0.02 # Stockout cost (lost revenue from unmet demand) stockout_units = max(0, demand - units_sold) stockout_cost = stockout_units * watch['sell_price'] * 0.3 # 30% penalty total_costs = production_cost + labor_cost + holding_cost + stockout_cost profit = revenue - total_costs return { 'demand': int(demand), 'production': production, 'inventory_start': inventory_start, 'inventory_end': inventory_end, 'units_sold': int(units_sold), 'stockout_units': int(stockout_units), 'revenue': round(revenue, 2), 'production_cost': round(production_cost, 2), 'labor_cost': round(labor_cost, 2), 'holding_cost': round(holding_cost, 2), 'stockout_cost': round(stockout_cost, 2), 'total_costs': round(total_costs, 2), 'profit': round(profit, 2) } def generate_dataset(self, years: int = 11) -> Dict: """ Generate complete dataset for specified number of years Args: years: Number of years to generate (default 11) Returns: Dictionary containing all historical data """ total_months = years * 12 start_date = datetime(2014, 1, 1) # Start from January 2014 dataset = { 'metadata': { 'generated_date': datetime.now().isoformat(), 'years': years, 'total_months': total_months, 'start_date': start_date.isoformat(), 'watches': self.watches }, 'historical_data': [] } # Track inventory for each watch inventory = {watch['id']: 100 for watch in self.watches} # Start with 100 units each # Generate data month by month for month_idx in range(total_months): current_date = start_date + timedelta(days=30 * month_idx) year = (month_idx // 12) + 1 month_in_year = (month_idx % 12) + 1 month_data = { 'month_index': month_idx, 'year': year, 'month': month_in_year, 'date': current_date.strftime('%Y-%m'), 'watches': [] } # Generate data for each watch for watch in self.watches: # Generate demand demand = self._generate_monthly_demand(watch, month_idx, total_months) # Simple production strategy: produce based on demand + safety stock # In the "historical reality", the company had a decent strategy production = int(demand * 1.05) # Produce 5% more than expected demand # Calculate financials watch_data = self._calculate_costs_and_revenue( watch, demand, production, inventory[watch['id']] ) # Update inventory for next month inventory[watch['id']] = watch_data['inventory_end'] # Add watch info watch_data['watch_id'] = watch['id'] watch_data['watch_name'] = watch['name'] month_data['watches'].append(watch_data) dataset['historical_data'].append(month_data) return dataset def save_dataset(self, dataset: Dict, filepath: str = 'supply_chain_data.json'): """Save dataset to JSON file""" with open(filepath, 'w') as f: json.dump(dataset, f, indent=2) print(f"Dataset saved to {filepath}") def get_training_data(self, dataset: Dict, training_years: int = 10) -> Dict: """ Extract training data (first N years) from full dataset Args: dataset: Full dataset training_years: Number of years to include in training set Returns: Dictionary with training data only """ training_months = training_years * 12 training_data = { 'metadata': dataset['metadata'].copy(), 'historical_data': dataset['historical_data'][:training_months] } training_data['metadata']['years'] = training_years training_data['metadata']['total_months'] = training_months training_data['metadata']['note'] = f"Training data: first {training_years} years" return training_data def get_test_data(self, dataset: Dict, test_year: int = 11) -> List[Dict]: """ Extract test data (year to predict) Args: dataset: Full dataset test_year: Year number to extract (1-based) Returns: List of monthly data for the test year """ start_idx = (test_year - 1) * 12 end_idx = test_year * 12 return dataset['historical_data'][start_idx:end_idx] def main(): """Generate and save the dataset""" print("Generating Supply Chain Dataset...") print("-" * 50) generator = SupplyChainDataGenerator(seed=42) # Generate full 11-year dataset full_dataset = generator.generate_dataset(years=11) # Save full dataset generator.save_dataset(full_dataset, 'data/supply_chain_data_full.json') # Save training data (10 years) training_data = generator.get_training_data(full_dataset, training_years=10) generator.save_dataset(training_data, 'data/supply_chain_data_training.json') # Save test data (year 11) test_data = generator.get_test_data(full_dataset, test_year=11) with open('data/supply_chain_data_test.json', 'w') as f: json.dump(test_data, f, indent=2) print("-" * 50) print("Dataset Generation Complete!") print(f"Total months: {len(full_dataset['historical_data'])}") print(f"Training months: {len(training_data['historical_data'])}") print(f"Test months: {len(test_data)}") # Print sample statistics print("\n" + "=" * 50) print("Sample Statistics (Year 1):") print("=" * 50) year1_data = full_dataset['historical_data'][:12] for watch in full_dataset['metadata']['watches']: watch_id = watch['id'] watch_name = watch['name'] demands = [m['watches'][watch_id-1]['demand'] for m in year1_data] revenues = [m['watches'][watch_id-1]['revenue'] for m in year1_data] print(f"\n{watch_name}:") print(f" Avg Monthly Demand: {np.mean(demands):.1f} units") print(f" Avg Monthly Revenue: CHF {np.mean(revenues):,.2f}") print(f" Annual Revenue: CHF {np.sum(revenues):,.2f}") if __name__ == "__main__": main() Loading
forecast_app/data_generator.pydeleted 100644 → 0 +0 −320 Original line number Diff line number Diff line """ Data Generator for Supply Chain Forecasting Educational App Generates 11 years of realistic monthly sales and supply chain data for 3 watch models. Includes seasonal patterns, trends, and realistic variance. """ import numpy as np import json from datetime import datetime, timedelta from typing import Dict, List, Tuple class SupplyChainDataGenerator: """Generates realistic supply chain data with seasonal patterns and trends""" def __init__(self, seed: int = 42): """ Initialize the data generator Args: seed: Random seed for reproducibility """ np.random.seed(seed) # Define the 3 watch models self.watches = [ { 'id': 1, 'name': 'Luxury Classic', 'category': 'luxury', 'base_cost': 150.0, # Cost to produce 'sell_price': 500.0, # Retail price 'base_demand': 80, # Base monthly demand 'seasonality_amplitude': 0.3, # 30% seasonal variation 'trend': 0.005, # 0.5% monthly growth 'peak_months': [11, 12, 1] # November, December, January (holidays) }, { 'id': 2, 'name': 'Sport Pro', 'category': 'sport', 'base_cost': 80.0, 'sell_price': 220.0, 'base_demand': 150, 'seasonality_amplitude': 0.25, 'trend': 0.008, # 0.8% monthly growth (growing category) 'peak_months': [4, 5, 6, 7] # Spring/Summer (outdoor season) }, { 'id': 3, 'name': 'Casual Style', 'category': 'casual', 'base_cost': 40.0, 'sell_price': 120.0, 'base_demand': 200, 'seasonality_amplitude': 0.15, # Less seasonal 'trend': 0.003, # 0.3% monthly growth 'peak_months': [9, 10] # Back to school season } ] def _generate_monthly_demand(self, watch: Dict, month_index: int, total_months: int) -> float: """ Generate realistic monthly demand for a watch model Args: watch: Watch model configuration month_index: Current month index (0-based) total_months: Total number of months Returns: Monthly demand (float) """ # Base demand demand = watch['base_demand'] # Add long-term trend trend_factor = 1 + (watch['trend'] * month_index) demand *= trend_factor # Add seasonality (using sine wave with peaks at specific months) calendar_month = (month_index % 12) + 1 # 1-12 # Create seasonality boost for peak months if calendar_month in watch['peak_months']: seasonal_factor = 1 + watch['seasonality_amplitude'] else: # Use sine wave for smooth transitions phase = (2 * np.pi * month_index) / 12 seasonal_factor = 1 + (watch['seasonality_amplitude'] * 0.5 * np.sin(phase)) demand *= seasonal_factor # Add random noise (±10%) noise = np.random.normal(1.0, 0.1) demand *= noise # Ensure demand is positive and round to integer demand = max(10, round(demand)) return demand def _calculate_costs_and_revenue(self, watch: Dict, demand: float, production: int, inventory_start: int) -> Dict: """ Calculate monthly costs and revenue based on production decisions Args: watch: Watch model configuration demand: Actual customer demand production: Units produced inventory_start: Starting inventory Returns: Dictionary with financial metrics """ # Calculate what we can actually sell (limited by inventory + production) available_units = inventory_start + production units_sold = min(demand, available_units) # Calculate ending inventory inventory_end = available_units - units_sold # Revenue from sales revenue = units_sold * watch['sell_price'] # Costs production_cost = production * watch['base_cost'] labor_cost = production * 20.0 # Fixed labor cost per unit # Inventory holding cost (2% of product cost per month per unit) holding_cost = inventory_end * watch['base_cost'] * 0.02 # Stockout cost (lost revenue from unmet demand) stockout_units = max(0, demand - units_sold) stockout_cost = stockout_units * watch['sell_price'] * 0.3 # 30% penalty total_costs = production_cost + labor_cost + holding_cost + stockout_cost profit = revenue - total_costs return { 'demand': int(demand), 'production': production, 'inventory_start': inventory_start, 'inventory_end': inventory_end, 'units_sold': int(units_sold), 'stockout_units': int(stockout_units), 'revenue': round(revenue, 2), 'production_cost': round(production_cost, 2), 'labor_cost': round(labor_cost, 2), 'holding_cost': round(holding_cost, 2), 'stockout_cost': round(stockout_cost, 2), 'total_costs': round(total_costs, 2), 'profit': round(profit, 2) } def generate_dataset(self, years: int = 11) -> Dict: """ Generate complete dataset for specified number of years Args: years: Number of years to generate (default 11) Returns: Dictionary containing all historical data """ total_months = years * 12 start_date = datetime(2014, 1, 1) # Start from January 2014 dataset = { 'metadata': { 'generated_date': datetime.now().isoformat(), 'years': years, 'total_months': total_months, 'start_date': start_date.isoformat(), 'watches': self.watches }, 'historical_data': [] } # Track inventory for each watch inventory = {watch['id']: 100 for watch in self.watches} # Start with 100 units each # Generate data month by month for month_idx in range(total_months): current_date = start_date + timedelta(days=30 * month_idx) year = (month_idx // 12) + 1 month_in_year = (month_idx % 12) + 1 month_data = { 'month_index': month_idx, 'year': year, 'month': month_in_year, 'date': current_date.strftime('%Y-%m'), 'watches': [] } # Generate data for each watch for watch in self.watches: # Generate demand demand = self._generate_monthly_demand(watch, month_idx, total_months) # Simple production strategy: produce based on demand + safety stock # In the "historical reality", the company had a decent strategy production = int(demand * 1.05) # Produce 5% more than expected demand # Calculate financials watch_data = self._calculate_costs_and_revenue( watch, demand, production, inventory[watch['id']] ) # Update inventory for next month inventory[watch['id']] = watch_data['inventory_end'] # Add watch info watch_data['watch_id'] = watch['id'] watch_data['watch_name'] = watch['name'] month_data['watches'].append(watch_data) dataset['historical_data'].append(month_data) return dataset def save_dataset(self, dataset: Dict, filepath: str = 'supply_chain_data.json'): """Save dataset to JSON file""" with open(filepath, 'w') as f: json.dump(dataset, f, indent=2) print(f"Dataset saved to {filepath}") def get_training_data(self, dataset: Dict, training_years: int = 10) -> Dict: """ Extract training data (first N years) from full dataset Args: dataset: Full dataset training_years: Number of years to include in training set Returns: Dictionary with training data only """ training_months = training_years * 12 training_data = { 'metadata': dataset['metadata'].copy(), 'historical_data': dataset['historical_data'][:training_months] } training_data['metadata']['years'] = training_years training_data['metadata']['total_months'] = training_months training_data['metadata']['note'] = f"Training data: first {training_years} years" return training_data def get_test_data(self, dataset: Dict, test_year: int = 11) -> List[Dict]: """ Extract test data (year to predict) Args: dataset: Full dataset test_year: Year number to extract (1-based) Returns: List of monthly data for the test year """ start_idx = (test_year - 1) * 12 end_idx = test_year * 12 return dataset['historical_data'][start_idx:end_idx] def main(): """Generate and save the dataset""" print("Generating Supply Chain Dataset...") print("-" * 50) generator = SupplyChainDataGenerator(seed=42) # Generate full 11-year dataset full_dataset = generator.generate_dataset(years=11) # Save full dataset generator.save_dataset(full_dataset, 'data/supply_chain_data_full.json') # Save training data (10 years) training_data = generator.get_training_data(full_dataset, training_years=10) generator.save_dataset(training_data, 'data/supply_chain_data_training.json') # Save test data (year 11) test_data = generator.get_test_data(full_dataset, test_year=11) with open('data/supply_chain_data_test.json', 'w') as f: json.dump(test_data, f, indent=2) print("-" * 50) print("Dataset Generation Complete!") print(f"Total months: {len(full_dataset['historical_data'])}") print(f"Training months: {len(training_data['historical_data'])}") print(f"Test months: {len(test_data)}") # Print sample statistics print("\n" + "=" * 50) print("Sample Statistics (Year 1):") print("=" * 50) year1_data = full_dataset['historical_data'][:12] for watch in full_dataset['metadata']['watches']: watch_id = watch['id'] watch_name = watch['name'] demands = [m['watches'][watch_id-1]['demand'] for m in year1_data] revenues = [m['watches'][watch_id-1]['revenue'] for m in year1_data] print(f"\n{watch_name}:") print(f" Avg Monthly Demand: {np.mean(demands):.1f} units") print(f" Avg Monthly Revenue: CHF {np.mean(revenues):,.2f}") print(f" Annual Revenue: CHF {np.sum(revenues):,.2f}") if __name__ == "__main__": main()