Add generate_changed_test_data method to simulate external disrupting events (05fd369d) · Commits · Barthelet Thibault / horloml

.gitignore

+1 −0

Original line number	Diff line number	Diff line
		@@ -3,6 +3,7 @@
		/HorloML_EDU_Main/web_app/flask_session
		/supply_chain_sim.egg-info
		/forecast_app/data
		/data
		/venv
		/__pycache__
		*.egg

forecast_app/smart_data_generator.py

+123 −0

Original line number	Diff line number	Diff line
		@@ -439,6 +439,113 @@ class SmartSupplyChainDataGenerator:

		return dataset['historical_data'][start_idx:end_idx]

		def generate_changed_test_data(self, dataset: Dict, disruption_config: List[Dict],
		test_year: int = 11, filepath: str = 'data/data_test_external_events.json'):
		"""
		Generate test data with external disruption events applied

		Args:
		dataset: Full dataset to extract test data from
		disruption_config: List of disruption events, each with:
		- month_to_change: Month number (1-12) to apply disruption
		- demand_ratio: Multiplier for demand (e.g., 0.5 halves demand, 2.0 doubles it)
		- watch_ids: Optional list of watch IDs to affect (default: all watches)
		test_year: Which year to use as test data (default: 11)
		filepath: Where to save the modified test data

		Returns:
		Modified test data with disruptions applied

		Example:
		disruption_config = [
		{'month_to_change': 3, 'demand_ratio': 0.5}, # Halve demand in March
		{'month_to_change': 6, 'demand_ratio': 0.3, 'watch_ids': [1, 2]} # 70% drop in June for watches 1&2
		]
		"""
		# Get base test data
		test_data = self.get_test_data(dataset, test_year)

		# Create a deep copy to modify
		import copy
		modified_test_data = copy.deepcopy(test_data)

		# Track inventory for recalculating financials
		# Get ending inventory from last training month
		last_training_idx = (test_year - 1) * 12 - 1
		inventory = {}
		for watch in self.watches:
		watch_id = watch['id']
		# Find the watch data in the last training month
		last_month_data = dataset['historical_data'][last_training_idx]
		watch_data = next(w for w in last_month_data['watches'] if w['watch_id'] == watch_id)
		inventory[watch_id] = watch_data['inventory_end']

		# Apply disruptions month by month
		for month_idx, month_data in enumerate(modified_test_data):
		month_in_year = month_data['month']

		# Check if any disruptions apply to this month
		disruptions_this_month = [
		d for d in disruption_config
		if d['month_to_change'] == month_in_year
		]

		# Process each watch
		for watch_idx, watch in enumerate(self.watches):
		watch_id = watch['id']
		watch_data = month_data['watches'][watch_idx]

		# Get current demand
		original_demand = watch_data['demand']
		modified_demand = original_demand

		# Apply all applicable disruptions for this watch
		for disruption in disruptions_this_month:
		# Check if this disruption affects this watch
		affected_watches = disruption.get('watch_ids', None)
		if affected_watches is None or watch_id in affected_watches:
		modified_demand = int(modified_demand * disruption['demand_ratio'])

		# If demand changed, recalculate all financials
		if modified_demand != original_demand:
		production = watch_data['production'] # Keep same production decision

		# Recalculate financials with new demand
		new_financials = self._calculate_costs_and_revenue(
		watch, modified_demand, production, inventory[watch_id]
		)

		# Update watch data with new values
		watch_data.update(new_financials)
		watch_data['watch_id'] = watch['id']
		watch_data['watch_name'] = watch['name']
		watch_data['original_demand'] = original_demand # Keep track of original
		watch_data['disruption_applied'] = True

		# Update inventory for next month
		inventory[watch_id] = watch_data['inventory_end']

		# Add metadata about disruptions
		output_data = {
		'metadata': {
		'generated_date': datetime.now().isoformat(),
		'generator_type': 'smart_customer_simulation_with_disruptions',
		'base_year': test_year,
		'disruptions': disruption_config,
		'watches': self.watches
		},
		'test_data': modified_test_data
		}

		# Save to file
		import os
		os.makedirs(os.path.dirname(filepath), exist_ok=True)
		with open(filepath, 'w') as f:
		json.dump(output_data, f, indent=2)
		print(f"Modified test data with disruptions saved to {filepath}")

		return output_data


		def main():
		"""Generate and save the dataset"""
		@@ -473,6 +580,22 @@ def main():
		with open('data/sim2_supply_chain_data_test.json', 'w') as f:
		json.dump(test_data, f, indent=2)

		# Generate test data with external disruptions (example)
		print("\n" + "-" * 60)
		print("Generating test data with external disruptions...")
		print("-" * 60)

		disruption_config = [
		{'month_to_change': 3, 'demand_ratio': 0.5}, # Halve demand in March (supply chain crisis)
		{'month_to_change': 7, 'demand_ratio': 1.5}, # 50% increase in July (recovery surge)
		]

		disrupted_data = generator.generate_changed_test_data(
		full_dataset,
		disruption_config,
		filepath='data/sim2_data_test_external_events.json'
		)

		print("\n" + "=" * 60)
		print("Dataset Generation Complete!")
		print("=" * 60)