Commit b125df6e authored by Barthelet Thibault's avatar Barthelet Thibault
Browse files

removed old stuff

parent 257e9eef
Loading
Loading
Loading
Loading

forecast_app/data_generator.py

deleted100644 → 0
+0 −320
Original line number Diff line number Diff line
"""
Data Generator for Supply Chain Forecasting Educational App

Generates 11 years of realistic monthly sales and supply chain data for 3 watch models.
Includes seasonal patterns, trends, and realistic variance.
"""

import numpy as np
import json
from datetime import datetime, timedelta
from typing import Dict, List, Tuple


class SupplyChainDataGenerator:
    """Generates realistic supply chain data with seasonal patterns and trends"""

    def __init__(self, seed: int = 42):
        """
        Initialize the data generator

        Args:
            seed: Random seed for reproducibility
        """
        np.random.seed(seed)

        # Define the 3 watch models
        self.watches = [
            {
                'id': 1,
                'name': 'Luxury Classic',
                'category': 'luxury',
                'base_cost': 150.0,      # Cost to produce
                'sell_price': 500.0,     # Retail price
                'base_demand': 80,       # Base monthly demand
                'seasonality_amplitude': 0.3,  # 30% seasonal variation
                'trend': 0.005,          # 0.5% monthly growth
                'peak_months': [11, 12, 1]  # November, December, January (holidays)
            },
            {
                'id': 2,
                'name': 'Sport Pro',
                'category': 'sport',
                'base_cost': 80.0,
                'sell_price': 220.0,
                'base_demand': 150,
                'seasonality_amplitude': 0.25,
                'trend': 0.008,          # 0.8% monthly growth (growing category)
                'peak_months': [4, 5, 6, 7]  # Spring/Summer (outdoor season)
            },
            {
                'id': 3,
                'name': 'Casual Style',
                'category': 'casual',
                'base_cost': 40.0,
                'sell_price': 120.0,
                'base_demand': 200,
                'seasonality_amplitude': 0.15,  # Less seasonal
                'trend': 0.003,          # 0.3% monthly growth
                'peak_months': [9, 10]   # Back to school season
            }
        ]

    def _generate_monthly_demand(self, watch: Dict, month_index: int, total_months: int) -> float:
        """
        Generate realistic monthly demand for a watch model

        Args:
            watch: Watch model configuration
            month_index: Current month index (0-based)
            total_months: Total number of months

        Returns:
            Monthly demand (float)
        """
        # Base demand
        demand = watch['base_demand']

        # Add long-term trend
        trend_factor = 1 + (watch['trend'] * month_index)
        demand *= trend_factor

        # Add seasonality (using sine wave with peaks at specific months)
        calendar_month = (month_index % 12) + 1  # 1-12

        # Create seasonality boost for peak months
        if calendar_month in watch['peak_months']:
            seasonal_factor = 1 + watch['seasonality_amplitude']
        else:
            # Use sine wave for smooth transitions
            phase = (2 * np.pi * month_index) / 12
            seasonal_factor = 1 + (watch['seasonality_amplitude'] * 0.5 * np.sin(phase))

        demand *= seasonal_factor

        # Add random noise (±10%)
        noise = np.random.normal(1.0, 0.1)
        demand *= noise

        # Ensure demand is positive and round to integer
        demand = max(10, round(demand))

        return demand

    def _calculate_costs_and_revenue(self, watch: Dict, demand: float,
                                     production: int, inventory_start: int) -> Dict:
        """
        Calculate monthly costs and revenue based on production decisions

        Args:
            watch: Watch model configuration
            demand: Actual customer demand
            production: Units produced
            inventory_start: Starting inventory

        Returns:
            Dictionary with financial metrics
        """
        # Calculate what we can actually sell (limited by inventory + production)
        available_units = inventory_start + production
        units_sold = min(demand, available_units)

        # Calculate ending inventory
        inventory_end = available_units - units_sold

        # Revenue from sales
        revenue = units_sold * watch['sell_price']

        # Costs
        production_cost = production * watch['base_cost']
        labor_cost = production * 20.0  # Fixed labor cost per unit

        # Inventory holding cost (2% of product cost per month per unit)
        holding_cost = inventory_end * watch['base_cost'] * 0.02

        # Stockout cost (lost revenue from unmet demand)
        stockout_units = max(0, demand - units_sold)
        stockout_cost = stockout_units * watch['sell_price'] * 0.3  # 30% penalty

        total_costs = production_cost + labor_cost + holding_cost + stockout_cost
        profit = revenue - total_costs

        return {
            'demand': int(demand),
            'production': production,
            'inventory_start': inventory_start,
            'inventory_end': inventory_end,
            'units_sold': int(units_sold),
            'stockout_units': int(stockout_units),
            'revenue': round(revenue, 2),
            'production_cost': round(production_cost, 2),
            'labor_cost': round(labor_cost, 2),
            'holding_cost': round(holding_cost, 2),
            'stockout_cost': round(stockout_cost, 2),
            'total_costs': round(total_costs, 2),
            'profit': round(profit, 2)
        }

    def generate_dataset(self, years: int = 11) -> Dict:
        """
        Generate complete dataset for specified number of years

        Args:
            years: Number of years to generate (default 11)

        Returns:
            Dictionary containing all historical data
        """
        total_months = years * 12
        start_date = datetime(2014, 1, 1)  # Start from January 2014

        dataset = {
            'metadata': {
                'generated_date': datetime.now().isoformat(),
                'years': years,
                'total_months': total_months,
                'start_date': start_date.isoformat(),
                'watches': self.watches
            },
            'historical_data': []
        }

        # Track inventory for each watch
        inventory = {watch['id']: 100 for watch in self.watches}  # Start with 100 units each

        # Generate data month by month
        for month_idx in range(total_months):
            current_date = start_date + timedelta(days=30 * month_idx)
            year = (month_idx // 12) + 1
            month_in_year = (month_idx % 12) + 1

            month_data = {
                'month_index': month_idx,
                'year': year,
                'month': month_in_year,
                'date': current_date.strftime('%Y-%m'),
                'watches': []
            }

            # Generate data for each watch
            for watch in self.watches:
                # Generate demand
                demand = self._generate_monthly_demand(watch, month_idx, total_months)

                # Simple production strategy: produce based on demand + safety stock
                # In the "historical reality", the company had a decent strategy
                production = int(demand * 1.05)  # Produce 5% more than expected demand

                # Calculate financials
                watch_data = self._calculate_costs_and_revenue(
                    watch, demand, production, inventory[watch['id']]
                )

                # Update inventory for next month
                inventory[watch['id']] = watch_data['inventory_end']

                # Add watch info
                watch_data['watch_id'] = watch['id']
                watch_data['watch_name'] = watch['name']

                month_data['watches'].append(watch_data)

            dataset['historical_data'].append(month_data)

        return dataset

    def save_dataset(self, dataset: Dict, filepath: str = 'supply_chain_data.json'):
        """Save dataset to JSON file"""
        with open(filepath, 'w') as f:
            json.dump(dataset, f, indent=2)
        print(f"Dataset saved to {filepath}")

    def get_training_data(self, dataset: Dict, training_years: int = 10) -> Dict:
        """
        Extract training data (first N years) from full dataset

        Args:
            dataset: Full dataset
            training_years: Number of years to include in training set

        Returns:
            Dictionary with training data only
        """
        training_months = training_years * 12

        training_data = {
            'metadata': dataset['metadata'].copy(),
            'historical_data': dataset['historical_data'][:training_months]
        }
        training_data['metadata']['years'] = training_years
        training_data['metadata']['total_months'] = training_months
        training_data['metadata']['note'] = f"Training data: first {training_years} years"

        return training_data

    def get_test_data(self, dataset: Dict, test_year: int = 11) -> List[Dict]:
        """
        Extract test data (year to predict)

        Args:
            dataset: Full dataset
            test_year: Year number to extract (1-based)

        Returns:
            List of monthly data for the test year
        """
        start_idx = (test_year - 1) * 12
        end_idx = test_year * 12

        return dataset['historical_data'][start_idx:end_idx]


def main():
    """Generate and save the dataset"""
    print("Generating Supply Chain Dataset...")
    print("-" * 50)

    generator = SupplyChainDataGenerator(seed=42)

    # Generate full 11-year dataset
    full_dataset = generator.generate_dataset(years=11)

    # Save full dataset
    generator.save_dataset(full_dataset, 'data/supply_chain_data_full.json')

    # Save training data (10 years)
    training_data = generator.get_training_data(full_dataset, training_years=10)
    generator.save_dataset(training_data, 'data/supply_chain_data_training.json')

    # Save test data (year 11)
    test_data = generator.get_test_data(full_dataset, test_year=11)
    with open('data/supply_chain_data_test.json', 'w') as f:
        json.dump(test_data, f, indent=2)

    print("-" * 50)
    print("Dataset Generation Complete!")
    print(f"Total months: {len(full_dataset['historical_data'])}")
    print(f"Training months: {len(training_data['historical_data'])}")
    print(f"Test months: {len(test_data)}")

    # Print sample statistics
    print("\n" + "=" * 50)
    print("Sample Statistics (Year 1):")
    print("=" * 50)

    year1_data = full_dataset['historical_data'][:12]
    for watch in full_dataset['metadata']['watches']:
        watch_id = watch['id']
        watch_name = watch['name']

        demands = [m['watches'][watch_id-1]['demand'] for m in year1_data]
        revenues = [m['watches'][watch_id-1]['revenue'] for m in year1_data]

        print(f"\n{watch_name}:")
        print(f"  Avg Monthly Demand: {np.mean(demands):.1f} units")
        print(f"  Avg Monthly Revenue: CHF {np.mean(revenues):,.2f}")
        print(f"  Annual Revenue: CHF {np.sum(revenues):,.2f}")


if __name__ == "__main__":
    main()