"""Configuration schema for DFM.

This module provides the core configuration dataclasses:
- BaseModelConfig: Base class with shared model structure
- DFMConfig(BaseModelConfig): Linear DFM with EM algorithm parameters
- DDFMConfig(BaseModelConfig): Deep DFM with neural network training parameters
- SeriesConfig: Component configurations

Note: Parameter classes (Params, FitParams) are in config/params.py
Note: Validation functions are in config/utils.py

The configuration hierarchy:
- BaseModelConfig: Model structure (series, blocks, factors, clock, data handling)
- DFMConfig: Adds EM algorithm parameters (max_iter, threshold, regularization)
- DDFMConfig: Adds neural network parameters (epochs, learning_rate, encoder_layers)

Blocks are defined as Dict[str, Dict[str, Any]] where each block is a dict with:
- factors: int (number of factors)
- ar_lag: int (AR lag order)
- clock: str (block clock frequency)

For loading configurations from files (YAML) or other sources,
see the config.adapter module which provides source adapters.
"""

import numpy as np
from typing import List, Optional, Dict, Any, Union
from dataclasses import dataclass, field

try:
    from typing import Protocol
except ImportError:
    from typing_extensions import Protocol

# Default block name when no blocks specified (generic, compatible with DDFM)
DEFAULT_BLOCK_NAME = 'Block_0'

# Import validation functions from utils
from .utils import validate_frequency, validate_transformation


# ============================================================================
# Helper Functions for Series/Blocks Parsing (Consolidated)
# ============================================================================

def _parse_series_list(series_data: List[Any], config_path: Optional[str] = None) -> List['SeriesConfig']:
    """Parse series from list format.
    
    Supports multiple formats:
    1. List of strings: ['A001', 'KOBDY10', ...] - loads from config/series/{series_id}.yaml
    2. List of dicts: [{'series_id': 'A001', 'frequency': 'm', ...}, ...]
    3. List of SeriesConfig instances: [SeriesConfig(...), ...]
    
    Parameters
    ----------
    series_data : List[Union[str, Dict, SeriesConfig]]
        List of series configurations (strings, dicts, or SeriesConfig instances)
    config_path : Optional[str]
        Path to config directory (for loading individual series files)
        
    Returns
    -------
    List[SeriesConfig]
        List of SeriesConfig instances
    """
    from pathlib import Path
    import yaml
    
    result = []
    for s in series_data:
        if isinstance(s, str):
            # Load from config/series/{s}.yaml
            if config_path is None:
                raise ValueError(f"config_path is required when series is a string: {s}")
            series_file = Path(config_path) / "series" / f"{s}.yaml"
            if series_file.exists():
                try:
                    with open(series_file, 'r') as f:
                        series_dict = yaml.safe_load(f) or {}
                    # Ensure series_id is set
                    if 'series_id' not in series_dict:
                        series_dict['series_id'] = s
                    result.append(SeriesConfig(**series_dict))
                except Exception as e:
                    # Fallback: create SeriesConfig with defaults
                    result.append(SeriesConfig(series_id=s, frequency='m', transformation='lin'))
            else:
                # Fallback: create SeriesConfig with defaults
                result.append(SeriesConfig(series_id=s, frequency='m', transformation='lin'))
        elif isinstance(s, dict):
            result.append(SeriesConfig(**s))
        else:
            # Assume it's already a SeriesConfig instance
            result.append(s)
    return result


def _parse_blocks_dict(blocks_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
    """Parse blocks from dict format.
    
    Parameters
    ----------
    blocks_data : Dict[str, Any]
        Dictionary mapping block names to block configurations
        
    Returns
    -------
    Dict[str, Dict[str, Any]]
        Dictionary mapping block names to block config dicts
        
    Raises
    ------
    ValueError
        If block config is not a dict
    """
    blocks_dict = {}
    for block_name, block_cfg in blocks_data.items():
        if isinstance(block_cfg, dict):
            blocks_dict[block_name] = block_cfg
        else:
            raise ValueError(f"Invalid block config for {block_name}: {block_cfg}. Must be a dict.")
    return blocks_dict


def _infer_blocks(
    series_list: List['SeriesConfig'],
    data: Dict[str, Any]
) -> Dict[str, Dict[str, Any]]:
    """Infer blocks from series when blocks not explicitly provided.
    
    Parameters
    ----------
    series_list : List[SeriesConfig]
        List of series configurations
    data : Dict[str, Any]
        Configuration data (for clock default)
        
    Returns
    -------
    Dict[str, Dict[str, Any]]
        Dictionary mapping block names to block config dicts
        
    Note: Since SeriesConfig no longer has blocks field, this creates a default single block
    that all series load on.
    """
    clock = data.get('clock', 'm')
    # Create default single block that all series load on
    series_ids = [s.series_id if s.series_id else f"series_{i}" for i, s in enumerate(series_list)]
    return {
        'block1': {
            'factors': 1,
            'ar_lag': 1,
            'clock': clock,
            'series': series_ids  # All series load on the default block
        }
    }


def _detect_config_type(data: Dict[str, Any]) -> str:
    """Detect config type (DFM or DDFM) from data dictionary.
    
    This helper function provides a single source of truth for config type detection.
    It checks for DDFM-specific parameters or explicit model_type specification.
    
    Parameters
    ----------
    data : Dict[str, Any]
        Configuration data dictionary
        
    Returns
    -------
    str
        'ddfm' if DDFM config detected, 'dfm' otherwise
        
    Detection Logic:
    - Checks if model_type is 'ddfm' or 'deep'
    - Checks for DDFM-specific parameters:
      - Keys starting with 'ddfm_'
      - Keys: 'encoder_layers', 'epochs', 'learning_rate', 'batch_size'
    - Returns 'ddfm' if any condition is met (unless model_type is explicitly 'dfm')
    """
    model_type = data.get('model_type', '').lower()
    has_ddfm_params = any(
        key.startswith('ddfm_') or 
        key in ['encoder_layers', 'epochs', 'learning_rate', 'batch_size']
        for key in data.keys()
    )
    
    if model_type in ('ddfm', 'deep') or (has_ddfm_params and model_type != 'dfm'):
        return 'ddfm'
    return 'dfm'


@dataclass
class SeriesConfig:
    """Configuration for a single time series.
    
    This is a generic DFM configuration - no API or database-specific fields.
    For API/database integration, implement adapters in your application layer.
    
    Attributes
    ----------
    frequency : str
        Series frequency: 'm' (monthly), 'q' (quarterly), 'sa' (semi-annual), 'a' (annual)
    transformation : str
        Transformation code: 'lin', 'pch', 'pca', etc.
    series_id : str, optional
        Unique identifier (auto-generated if None)
    series_name : str, optional
        Human-readable name (defaults to series_id if None)
    units : str, optional
        Units of measurement (optional metadata for display purposes only).
        Used in news decomposition output for readability. Not used in model estimation.
    release_date : int, optional
        Release date information for pseudo real-time nowcasting.
        - Positive value (1-31): Day of month when data is released
        - Negative value: Days before end of previous month when data is released
        Example: 25 = released on 25th of each month, -5 = released 5 days before end of previous month
    
    Note: Block assignments are defined in DFMConfig.blocks[block_name]['series'], not in SeriesConfig.
    """
    # Required fields (no defaults)
    frequency: str
    transformation: str
    # Optional fields (with defaults - must come after required fields)
    series_id: Optional[str] = None  # Auto-generated if None: "series_0", "series_1", etc.
    series_name: Optional[str] = None  # Optional metadata for display
    units: Optional[str] = None  # Optional metadata for display only (used in news.py output)
    release_date: Optional[int] = None  # Release date for pseudo real-time nowcasting
    
    def __post_init__(self):
        """Validate fields after initialization."""
        self.frequency = validate_frequency(self.frequency)
        self.transformation = validate_transformation(self.transformation)
        # Auto-generate series_name if not provided
        if self.series_name is None and self.series_id:
            self.series_name = self.series_id


@dataclass
class BaseModelConfig:
    """Base configuration class with shared model structure.
    
    This base class contains the model structure that is common to both
    DFM (linear) and DDFM (deep) models:
    - Series definitions
    - Clock frequency
    - Data preprocessing (missing data handling)
    
    Note: Block structure is DFM-specific and is defined in DFMConfig.
    DDFM uses num_factors instead of blocks.
    
    Subclasses (DFMConfig, DDFMConfig) add model-specific training parameters.
    """
    # ========================================================================
    # Model Structure (WHAT - defines the model)
    # ========================================================================
    series: List[SeriesConfig]  # Series specifications
    
    # ========================================================================
    # Shared Data Handling Parameters
    # ========================================================================
    nan_method: int = 2  # Missing data handling method (1-5). Preprocessing step before Kalman Filter-based handling
    nan_k: int = 3  # Spline parameter for NaN interpolation (cubic spline)
    clock: str = 'm'  # Base frequency for nowcasting (global clock): 'd', 'w', 'm', 'q', 'sa', 'a' (defaults to 'm' for monthly)
    scaler: Optional[str] = 'standard'  # Unified scaler type for all series: 'standard', 'robust', 'minmax', 'maxabs', 'quantile', or None (no scaling). Default: 'standard' for unified scaling.
    
    def __post_init__(self):
        """Validate basic model structure.
        
        This method performs basic validation of the model configuration:
        - Ensures at least one series is specified
        - Validates clock frequency
        - Auto-generates series_id if not provided
        
        Raises
        ------
        ValueError
            If any validation check fails, with a descriptive error message
            indicating what needs to be fixed.
        """
        # Import frequency hierarchy for validation
        from .utils import FREQUENCY_HIERARCHY
        
        if not self.series:
            raise ValueError(
                "Model configuration must contain at least one series. "
                "Please add series definitions to your configuration."
            )
        
        # Validate global clock
        self.clock = validate_frequency(self.clock)
        
        # Auto-generate series_id if not provided
        for i, s in enumerate(self.series):
            if s.series_id is None:
                s.series_id = f"series_{i}"
            if s.series_name is None:
                s.series_name = s.series_id
    
    # ========================================================================
    # Helper Methods (snake_case - recommended)
    # ========================================================================
    
    def get_series_ids(self) -> List[str]:
        """Get list of series IDs (snake_case - recommended)."""
        return [s.series_id if s.series_id is not None else f"series_{i}" 
                for i, s in enumerate(self.series)]
    
    def get_series_names(self) -> List[str]:
        """Get list of series names (snake_case - recommended)."""
        return [s.series_name if s.series_name is not None else (s.series_id or f"series_{i}")
                for i, s in enumerate(self.series)]
    
    def get_frequencies(self) -> List[str]:
        """Get list of frequencies (snake_case - recommended)."""
        return [s.frequency for s in self.series]
    
    def validate_and_report(self) -> Dict[str, Any]:
        """Validate configuration and return structured report with issues and suggestions.
        
        This method performs validation checks without raising exceptions, returning
        a structured report that can be used for debugging and user guidance.
        
        Returns
        -------
        Dict[str, Any]
            Report dictionary with keys:
            - 'valid': bool - Whether configuration is valid
            - 'errors': List[str] - List of error messages
            - 'warnings': List[str] - List of warning messages
            - 'suggestions': List[str] - List of actionable suggestions
        """
        report = {
            'valid': True,
            'errors': [],
            'warnings': [],
            'suggestions': []
        }
        
        # Check for empty series
        if not self.series:
            report['valid'] = False
            report['errors'].append("Model configuration must contain at least one series.")
            report['suggestions'].append("Add series definitions to your configuration.")
            return report
        
        return report


@dataclass
class DFMConfig(BaseModelConfig):
    """Linear DFM configuration - EM algorithm parameters.
    
    This configuration class extends BaseModelConfig with parameters specific
    to linear Dynamic Factor Models trained using the Expectation-Maximization
    (EM) algorithm.
    
    DFM uses a block structure where factors are organized into blocks.
    Each series loads on one or more blocks (must include the global block).
    
    The configuration can be built from:
    - Main settings (estimation parameters) from config/default.yaml
    - Series definitions from config/series/default.yaml or CSV
    - Block definitions from config/blocks/default.yaml
    """
    # ========================================================================
    # EM Algorithm Parameters (HOW - controls the algorithm)
    # ========================================================================
    ar_lag: int = 1  # Number of lags in AR transition equation (lookback window). Must be 1 or 2 (maximum supported order is VAR(2))
    threshold: float = 1e-5  # EM convergence threshold
    max_iter: int = 5000  # Maximum EM iterations
    
    # ========================================================================
    # Block Structure (DFM-specific)
    # ========================================================================
    blocks: Dict[str, Dict[str, Any]] = field(default_factory=dict)  # Block configurations (block_name -> {factors, ar_lag, clock, series, notes})
    block_names: Optional[List[str]] = None  # Optional block names in order. If None, auto-generated as "block1", "block2", etc.
    factors_per_block: List[int] = field(init=False)  # Number of factors per block (derived from blocks)
    _cached_blocks: Optional[np.ndarray] = field(default=None, init=False, repr=False)  # Cached blocks array
    
    # ========================================================================
    # Numerical Stability Parameters (transparent and configurable)
    # ========================================================================
    # AR Coefficient Clipping
    clip_ar_coefficients: bool = True  # Enable AR coefficient clipping for stationarity
    ar_clip_min: float = -0.99  # Minimum AR coefficient (must be > -1 for stationarity)
    ar_clip_max: float = 0.99   # Maximum AR coefficient (must be < 1 for stationarity)
    warn_on_ar_clip: bool = True  # Warn when AR coefficients are clipped (indicates near-unit root)
    
    # Data Value Clipping
    clip_data_values: bool = True  # Enable clipping of extreme data values
    data_clip_threshold: float = 100.0  # Clip values beyond this many standard deviations
    warn_on_data_clip: bool = True  # Warn when data values are clipped (indicates outliers)
    
    # Regularization
    use_regularization: bool = True  # Enable regularization for numerical stability
    regularization_scale: float = 1e-5  # Scale factor for ridge regularization (relative to trace, default 1e-5)
    min_eigenvalue: float = 1e-8  # Minimum eigenvalue for positive definite matrices
    max_eigenvalue: float = 1e6   # Maximum eigenvalue cap to prevent explosion
    warn_on_regularization: bool = True  # Warn when regularization is applied
    
    # Damped Updates
    use_damped_updates: bool = True  # Enable damped updates when likelihood decreases
    damping_factor: float = 0.8  # Damping factor (0.8 = 80% new, 20% old)
    warn_on_damped_update: bool = True  # Warn when damped updates are used
    
    # Idiosyncratic Component Augmentation
    augment_idio: bool = True  # Enable state augmentation with idiosyncratic components (default: True)
    augment_idio_slow: bool = True  # Enable tent-length chains for slower-frequency series (default: True)
    idio_rho0: float = 0.1  # Initial AR coefficient for idiosyncratic components (default: 0.1)
    idio_min_var: float = 1e-8  # Minimum variance for idiosyncratic innovation covariance (default: 1e-8)
    
    def __post_init__(self):
        """Validate DFM-specific structure (blocks and series consistency).
        
        This method performs comprehensive validation of the DFM configuration:
        - Calls parent __post_init__ for basic validation
        - Validates blocks structure
        - Derives block_names and factors_per_block from blocks dict
        - Validates block structure consistency across all series
        - Ensures all series load on the global block
        - Validates block clock constraints (series frequency <= block clock)
        - Validates factor dimensions match block structure
        
        Raises
        ------
        ValueError
            If any validation check fails, with a descriptive error message
            indicating what needs to be fixed.
        """
        # Call parent validation first
        super().__post_init__()
        
        # Import frequency hierarchy for validation
        from .utils import FREQUENCY_HIERARCHY
        
        if not self.blocks:
            raise ValueError(
                "DFM configuration must contain at least one block. "
                "Please add block definitions to your configuration."
            )
        
        # Derive block_names from blocks dict
        # If block_names not provided, auto-generate as "block1", "block2", etc.
        if self.block_names is None:
            # Auto-generate block names: "block1", "block2", etc.
            num_blocks = len(self.blocks)
            auto_block_names = [f"block{i+1}" for i in range(num_blocks)]
            # Rename blocks dict keys to auto-generated names
            blocks_renamed = {}
            for i, (old_name, block_cfg) in enumerate(self.blocks.items()):
                new_name = auto_block_names[i]
                blocks_renamed[new_name] = block_cfg
            object.__setattr__(self, 'blocks', blocks_renamed)
            final_block_names = auto_block_names
        else:
            # Use provided block_names, but validate they match blocks dict keys
            if len(self.block_names) != len(self.blocks):
                raise ValueError(
                    f"block_names length ({len(self.block_names)}) does not match "
                    f"blocks dict size ({len(self.blocks)}). "
                    f"Please ensure block_names has the same number of elements as blocks."
                )
            # Reorder blocks dict to match block_names order
            blocks_ordered = {}
            for block_name in self.block_names:
                if block_name in self.blocks:
                    blocks_ordered[block_name] = self.blocks[block_name]
                else:
                    raise ValueError(
                        f"block_name '{block_name}' in block_names is not found in blocks dict. "
                        f"Available block names: {list(self.blocks.keys())}"
                    )
            object.__setattr__(self, 'blocks', blocks_ordered)
            final_block_names = self.block_names
        
        # Ensure at least one block exists
        if not final_block_names:
            raise ValueError(
                "DFM configuration must include at least one block. "
                "The first block serves as the global/common factor that all series load on."
            )
        
        # Set final block_names (first block is global)
        object.__setattr__(self, 'block_names', final_block_names)
        object.__setattr__(self, 'factors_per_block', 
                         [self.blocks[name].get('factors', 1) for name in final_block_names])
        
        # At this point, block_names is guaranteed to be a non-empty list
        assert self.block_names is not None and len(self.block_names) > 0
        
        # Validate global clock
        global_clock_hierarchy = FREQUENCY_HIERARCHY.get(self.clock, 3)
        
        # Validate block clocks (must be >= global clock)
        for block_name, block_cfg in self.blocks.items():
            block_clock = block_cfg.get('clock', self.clock)
            block_clock = validate_frequency(block_clock)
            block_clock_hierarchy = FREQUENCY_HIERARCHY.get(block_clock, 3)
            if block_clock_hierarchy < global_clock_hierarchy:
                raise ValueError(
                    f"Block '{block_name}' has clock '{block_clock}' which is faster than "
                    f"global clock '{self.clock}'. Block clocks must be >= global clock. "
                    f"Suggested fix: change block '{block_name}' clock to '{self.clock}' or slower, "
                    f"or set global clock to '{block_clock}' or faster."
                )
            # Validate block properties
            factors = block_cfg.get('factors', 1)
            ar_lag = block_cfg.get('ar_lag', 1)
            if factors < 1:
                raise ValueError(
                    f"Block '{block_name}' validation failed: must have at least 1 factor, got {factors}. "
                    f"Please set factors >= 1 for block '{block_name}'."
                )
            if ar_lag < 1:
                raise ValueError(
                    f"Block '{block_name}' validation failed: AR lag must be at least 1, got {ar_lag}. "
                    f"Please set ar_lag >= 1 for block '{block_name}'."
                )
            if ar_lag > 2:
                raise ValueError(
                    f"Block '{block_name}' validation failed: AR lag must be at most 2 (maximum supported VAR order is VAR(2)), got {ar_lag}. "
                    f"Please set ar_lag to 1 (VAR(1)) or 2 (VAR(2)) for block '{block_name}'."
                )
        
        # Auto-generate series_id if not provided
        for i, s in enumerate(self.series):
            if s.series_id is None:
                s.series_id = f"series_{i}"
            if s.series_name is None:
                s.series_name = s.series_id
        
        # Validate block series lists and build blocks array
        n_blocks = len(self.block_names)
        series_ids = [s.series_id for s in self.series]
        
        # Ensure first block (global) has all series if not specified
        global_block_name = self.block_names[0]
        global_block_cfg = self.blocks[global_block_name]
        global_block_series = global_block_cfg.get('series', [])
        if not global_block_series:
            # If global block doesn't specify series, all series load on it
            global_block_cfg['series'] = series_ids
        
        # Validate block clock constraints: series frequency <= block clock
        for block_idx, block_name in enumerate(self.block_names):
            block_cfg = self.blocks[block_name]
            block_series_list = block_cfg.get('series', [])
            block_clock = block_cfg.get('clock', self.clock)
            block_clock_hierarchy = FREQUENCY_HIERARCHY.get(block_clock, 3)
            
            # If block.series is empty, all series can load on it
            if not block_series_list:
                block_cfg['series'] = series_ids
                block_series_list = series_ids
            
            # Validate series in block exist and check frequency constraints
            for series_id in block_series_list:
                series_obj = next((s for s in self.series if s.series_id == series_id), None)
                if series_obj is None:
                    raise ValueError(
                        f"Block '{block_name}' references series '{series_id}' in its 'series' list, "
                        f"but no series with series_id='{series_id}' exists in the configuration."
                    )
                
                # Validate frequency constraint
                series_freq_hierarchy = FREQUENCY_HIERARCHY.get(series_obj.frequency, 3)
                if series_freq_hierarchy < block_clock_hierarchy:
                    valid_freqs = [freq for freq, hier in FREQUENCY_HIERARCHY.items() 
                                  if hier >= block_clock_hierarchy]
                    valid_freqs_str = ', '.join(sorted(valid_freqs))
                    raise ValueError(
                        f"Series '{series_id}' has frequency '{series_obj.frequency}' which is faster than "
                        f"block '{block_name}' clock '{block_clock}'. "
                        f"Series in a block must have frequency <= block clock. "
                        f"Suggested fix: change series frequency to one of [{valid_freqs_str}] "
                        f"(slower or equal to block clock '{block_clock}'), "
                        f"or set block clock to '{series_obj.frequency}' or faster."
                    )
        
        # Ensure all series load on global block (first block)
        global_block_series = self.blocks[global_block_name].get('series', [])
        if global_block_series:
            for s in self.series:
                if s.series_id not in global_block_series:
                    raise ValueError(
                        f"Series '{s.series_id}' must load on the global block "
                        f"(first block '{global_block_name}'). "
                        f"Please add '{s.series_id}' to block '{global_block_name}'.series."
                    )
        
        # Validate factors_per_block
        if any(f < 1 for f in self.factors_per_block):
            invalid_blocks = [i for i, f in enumerate(self.factors_per_block) if f < 1]
            raise ValueError(
                f"factors_per_block must contain positive integers (>= 1). "
                f"Invalid values found at block indices {invalid_blocks}: "
                f"{[self.factors_per_block[i] for i in invalid_blocks]}. "
                f"Each block must have at least one factor."
            )
        
    
    def get_blocks_array(self) -> np.ndarray:
        """Get blocks as numpy array (snake_case - recommended, cached).
        
        Builds blocks array from blocks dict: blocks_array[i, j] = 1 if series i loads on block j.
        """
        if self._cached_blocks is None:
            n_series = len(self.series)
            n_blocks = len(self.block_names)
            blocks_array = np.zeros((n_series, n_blocks), dtype=int)
            
            # Build mapping from series_id to series index
            series_id_to_idx = {s.series_id: i for i, s in enumerate(self.series)}
            
            # For each block, set 1 for series that load on it
            for block_idx, block_name in enumerate(self.block_names):
                block_cfg = self.blocks[block_name]
                block_series_list = block_cfg.get('series', [])
                
                # If block.series is empty, all series load on it
                if not block_series_list:
                    blocks_array[:, block_idx] = 1
                else:
                    for series_id in block_series_list:
                        if series_id in series_id_to_idx:
                            series_idx = series_id_to_idx[series_id]
                            blocks_array[series_idx, block_idx] = 1
            
            self._cached_blocks = blocks_array
        return self._cached_blocks
    
    def validate_and_report(self) -> Dict[str, Any]:
        """Validate DFM configuration and return structured report with issues and suggestions.
        
        This method performs validation checks without raising exceptions, returning
        a structured report that can be used for debugging and user guidance.
        
        Returns
        -------
        Dict[str, Any]
            Report dictionary with keys:
            - 'valid': bool - Whether configuration is valid
            - 'errors': List[str] - List of error messages
            - 'warnings': List[str] - List of warning messages
            - 'suggestions': List[str] - List of actionable suggestions
        """
        from .utils import FREQUENCY_HIERARCHY
        
        # Get base report from parent
        report = super().validate_and_report()
        
        # Check for empty blocks
        if not self.blocks:
            report['valid'] = False
            report['errors'].append("DFM configuration must contain at least one block.")
            report['suggestions'].append("Add block definitions to your configuration.")
            return report
        
        # block_names should be set by __post_init__, but check for safety
        if self.block_names is None:
            report['valid'] = False
            report['errors'].append("block_names is None. This should not happen after __post_init__.")
            return report
        
        # Check frequency constraints
        global_clock_hierarchy = FREQUENCY_HIERARCHY.get(self.clock, 3)
        for block_idx, block_name in enumerate(self.block_names):
            block_cfg = self.blocks[block_name]
            block_series_list = block_cfg.get('series', [])
            block_clock = block_cfg.get('clock', self.clock)
            block_clock_hierarchy = FREQUENCY_HIERARCHY.get(block_clock, 3)
            
            # If block.series is empty, all series load on it
            if not block_series_list:
                block_series_list = [s.series_id for s in self.series]
            
            for series_id in block_series_list:
                series_obj = next((s for s in self.series if s.series_id == series_id), None)
                if series_obj:
                    series_freq_hierarchy = FREQUENCY_HIERARCHY.get(series_obj.frequency, 3)
                    if series_freq_hierarchy < block_clock_hierarchy:
                        valid_freqs = [freq for freq, hier in FREQUENCY_HIERARCHY.items() 
                                      if hier >= block_clock_hierarchy]
                        valid_freqs_str = ', '.join(sorted(valid_freqs))
                        report['valid'] = False
                        report['errors'].append(
                            f"Series '{series_id}' has frequency '{series_obj.frequency}' which is faster than "
                            f"block '{block_name}' clock '{block_clock}'."
                        )
                        report['suggestions'].append(
                            f"For series '{series_id}': change frequency to one of [{valid_freqs_str}], "
                            f"or set block '{block_name}' clock to '{series_obj.frequency}' or faster."
                        )
        
        # Check block clock constraints
        for block_name, block_cfg in self.blocks.items():
            block_clock = block_cfg.get('clock', self.clock)
            block_clock_hierarchy = FREQUENCY_HIERARCHY.get(block_clock, 3)
            if block_clock_hierarchy < global_clock_hierarchy:
                report['valid'] = False
                report['errors'].append(
                    f"Block '{block_name}' has clock '{block_clock}' which is faster than "
                    f"global clock '{self.clock}'."
                )
                report['suggestions'].append(
                    f"Change block '{block_name}' clock to '{self.clock}' or slower, "
                    f"or set global clock to '{block_clock}' or faster."
                )
        
        # Check factors_per_block
        if any(f < 1 for f in self.factors_per_block):
            invalid_blocks = [i for i, f in enumerate(self.factors_per_block) if f < 1]
            report['valid'] = False
            report['errors'].append(
                f"factors_per_block must contain positive integers (>= 1). "
                f"Invalid values found at block indices {invalid_blocks}."
            )
            report['suggestions'].append(
                f"Set factors_per_block[{invalid_blocks[0]}] to at least 1 for block '{self.block_names[invalid_blocks[0]]}'."
            )
        
        return report
    
    # ========================================================================
    # Factory Methods
    # ========================================================================
    
    @classmethod
    def _extract_base(cls, data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract shared base parameters from config dict."""
        return {
            'nan_method': data.get('nan_method', 2),
            'nan_k': data.get('nan_k', 3),
            'clock': data.get('clock', 'm'),
            'scaler': data.get('scaler', 'standard'),  # Unified scaler for all series (default: 'standard')
        }
    
    @classmethod
    def _extract_dfm_params(cls, data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract DFM-specific parameters from config dict."""
        base_params = cls._extract_base(data)
        base_params.update({
            'ar_lag': data.get('ar_lag', 1),
            'threshold': data.get('threshold', 1e-5),
            'max_iter': data.get('max_iter', 5000),
            # Numerical stability parameters
            'clip_ar_coefficients': data.get('clip_ar_coefficients', True),
            'ar_clip_min': data.get('ar_clip_min', -0.99),
            'ar_clip_max': data.get('ar_clip_max', 0.99),
            'warn_on_ar_clip': data.get('warn_on_ar_clip', True),
            'clip_data_values': data.get('clip_data_values', True),
            'data_clip_threshold': data.get('data_clip_threshold', 100.0),
            'warn_on_data_clip': data.get('warn_on_data_clip', True),
            'use_regularization': data.get('use_regularization', True),
            'regularization_scale': data.get('regularization_scale', 1e-5),
            'min_eigenvalue': data.get('min_eigenvalue', 1e-8),
            'max_eigenvalue': data.get('max_eigenvalue', 1e6),
            'warn_on_regularization': data.get('warn_on_regularization', True),
            'use_damped_updates': data.get('use_damped_updates', True),
            'damping_factor': data.get('damping_factor', 0.8),
            'warn_on_damped_update': data.get('warn_on_damped_update', True),
            # Idiosyncratic component augmentation
            'augment_idio': data.get('augment_idio', True),
            'augment_idio_slow': data.get('augment_idio_slow', True),
            'idio_rho0': data.get('idio_rho0', 0.1),
            'idio_min_var': data.get('idio_min_var', 1e-8),
        })
        return base_params


@dataclass
class DDFMConfig(BaseModelConfig):
    """Deep Dynamic Factor Model configuration - neural network training parameters.
    
    This configuration class extends BaseModelConfig with parameters specific
    to Deep Dynamic Factor Models trained using neural networks (autoencoders).
    
    Note: DDFM does not use block structure. Use num_factors directly to specify
    the number of factors.
    
    The configuration can be built from:
    - Main settings (training parameters) from config/default.yaml
    - Series definitions from config/series/default.yaml or CSV
    """
    # ========================================================================
    # Neural Network Training Parameters
    # ========================================================================
    encoder_layers: Optional[List[int]] = None  # Hidden layer dimensions for encoder (default: [64, 32])
    num_factors: Optional[int] = None  # Number of factors (inferred from config if None)
    activation: str = 'relu'  # Activation function ('tanh', 'relu', 'sigmoid', default: 'relu' to match original DDFM)
    use_batch_norm: bool = True  # Use batch normalization in encoder (default: True)
    learning_rate: float = 0.001  # Learning rate for Adam optimizer (default: 0.001)
    epochs: int = 100  # Number of training epochs (default: 100)
    batch_size: int = 100  # Batch size for training (default: 100 to match original DDFM)
    factor_order: int = 1  # VAR lag order for factor dynamics. Must be 1 or 2 (maximum supported order is VAR(2), default: 1)
    use_idiosyncratic: bool = True  # Model idio components with AR(1) dynamics (default: True)
    min_obs_idio: int = 5  # Minimum observations for idio AR(1) estimation (default: 5)
    
    # Additional training parameters
    max_iter: int = 200  # Maximum MCMC iterations for iterative factor extraction
    tolerance: float = 0.0005  # Convergence tolerance for MCMC iterations
    disp: int = 10  # Display frequency for training progress
    seed: Optional[int] = None  # Random seed for reproducibility
    
    
    # ========================================================================
    # Factory Methods (shared base methods)
    # ========================================================================
    
    @classmethod
    def _extract_base(cls, data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract shared base parameters from config dict (delegates to DFMConfig)."""
        return DFMConfig._extract_base(data)
    
    @classmethod
    def _extract_dfm_params(cls, data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract DFM-specific parameters from config dict (delegates to DFMConfig)."""
        return DFMConfig._extract_dfm_params(data)
    
    @classmethod
    def _extract_ddfm(cls, data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract DDFM-specific parameters from config dict."""
        base_params = cls._extract_base(data)
        # Handle both direct keys and ddfm_ prefix format
        base_params.update({
            'encoder_layers': data.get('encoder_layers') or data.get('ddfm_encoder_layers', None),
            'num_factors': data.get('num_factors') or data.get('ddfm_num_factors', None),
            'activation': data.get('activation') or data.get('ddfm_activation', 'relu'),
            'use_batch_norm': data.get('use_batch_norm', data.get('ddfm_use_batch_norm', True)),
            'learning_rate': data.get('learning_rate', data.get('ddfm_learning_rate', 0.001)),
            'epochs': data.get('epochs', data.get('ddfm_epochs', 100)),
            'batch_size': data.get('batch_size', data.get('ddfm_batch_size', 100)),
            'factor_order': data.get('factor_order', data.get('ddfm_factor_order', 1)),
            'use_idiosyncratic': data.get('use_idiosyncratic', data.get('ddfm_use_idiosyncratic', True)),
            'min_obs_idio': data.get('min_obs_idio', data.get('ddfm_min_obs_idio', 5)),
            'max_iter': data.get('max_iter', 200),
            'tolerance': data.get('tolerance', 0.0005),
            'disp': data.get('disp', 10),
            'seed': data.get('seed', None),
        })
        return base_params
    
    @classmethod
    def _from_hydra_dict(cls, data: Dict[str, Any]) -> Union['DFMConfig', 'DDFMConfig']:
        """Convert Hydra format (series as dict) to new format."""
        # Determine config type first
        config_type = _detect_config_type(data)
        
        # Parse series dict: {series_id: {frequency: ..., ...}}
        series_list = []
        if 'series' in data and isinstance(data['series'], dict):
            for series_id, series_cfg in data['series'].items():
                if isinstance(series_cfg, dict):
                    series_list.append(SeriesConfig(
                        series_id=series_id,
                        series_name=series_cfg.get('series_name', series_id),
                        frequency=series_cfg.get('frequency', 'm'),
                        transformation=series_cfg.get('transformation', 'lin'),
                        units=series_cfg.get('units', None),
                        release_date=series_cfg.get('release_date', None)
                    ))
        
        if config_type == 'ddfm':
            # DDFM does not use block structure
            return DDFMConfig(
                series=series_list,
                **DDFMConfig._extract_ddfm(data)
            )
        else:
            # DFM: Parse blocks dict (if not already parsed above)
            blocks_dict = data.get('blocks', {})
            blocks_dict_final = {}
            
            if isinstance(blocks_dict, dict) and blocks_dict:
                for block_name, block_data in blocks_dict.items():
                    if isinstance(block_data, dict):
                        blocks_dict_final[block_name] = {
                            'factors': block_data.get('factors', 1),
                            'ar_lag': block_data.get('ar_lag', 1),
                            'clock': block_data.get('clock', 'm'),
                            'series': block_data.get('series', []),
                            'notes': block_data.get('notes', None)
                        }
                    else:
                        blocks_dict_final[block_name] = {'factors': 1, 'ar_lag': 1, 'clock': 'm', 'series': []}
            else:
                # Infer blocks: create default single block with all series
                if series_list:
                    series_ids = [s.series_id if s.series_id else f"series_{i}" for i, s in enumerate(series_list)]
                    blocks_dict_final['block1'] = {
                        'factors': 1,
                        'ar_lag': 1,
                        'clock': data.get('clock', 'm'),
                        'series': series_ids
                    }
                else:
                    blocks_dict_final['block1'] = {'factors': 1, 'ar_lag': 1, 'clock': data.get('clock', 'm'), 'series': []}
            
            return DFMConfig(
                series=series_list,
                blocks=blocks_dict_final,
                **DDFMConfig._extract_dfm_params(data)
            )
    
    @classmethod
    def from_dict(cls, data: Dict[str, Any], config_path: Optional[str] = None) -> Union['DFMConfig', 'DDFMConfig']:
        """Create DFMConfig from dictionary.
        
        Handles multiple formats:
        1. New format (list): {'series': [{'series_id': ..., ...}], 'block_names': [...]}
        2. New format (Hydra): {'series': {'series_id': {...}}, 'blocks': {'block_name': {'factors': N}}}
        3. Series as strings: {'series': ['A001', 'KOBDY10', ...]} - loads from config/series/{series_id}.yaml
        
        Also accepts estimation parameters: ar_lag, threshold, max_iter, nan_method, nan_k
        
        Parameters
        ----------
        data : Dict[str, Any]
            Configuration dictionary
        config_path : Optional[str]
            Path to config directory (for loading individual series files when series is a list of strings)
        """
        # New Hydra format: series is a dict
        if 'series' in data and isinstance(data['series'], dict):
            return cls._from_hydra_dict(data)
        
        # New format with series list
        if 'series' in data and isinstance(data['series'], list):
            # Parse series list using helper (supports strings, dicts, or SeriesConfig instances)
            series_list = _parse_series_list(data['series'], config_path=config_path)
            
            # Handle blocks: dict of block properties
            if 'blocks' in data:
                blocks_data = data['blocks']
                if isinstance(blocks_data, dict):
                    blocks_dict = _parse_blocks_dict(blocks_data)
                else:
                    raise ValueError(f"blocks must be a dict, got {type(blocks_data)}")
            else:
                # If no blocks provided, infer from series using helper
                blocks_dict = _infer_blocks(series_list, data)
            
            # Determine config type using helper function
            config_type = _detect_config_type(data)
            
            if config_type == 'ddfm':
                return DDFMConfig(
                    series=series_list,
                    **DDFMConfig._extract_ddfm(data)
                )
            else:
                return DFMConfig(
                    series=series_list,
                    blocks=blocks_dict,
                    **DFMConfig._extract_dfm_params(data)
                )
        
        # Direct instantiation (shouldn't happen often, but handle it)
        # Try to determine type from instance
        if isinstance(cls, type) and issubclass(cls, DDFMConfig):
            return cls(**data)
        elif isinstance(cls, type) and issubclass(cls, DFMConfig):
            return cls(**data)
        else:
            # Default to DFMConfig
            return DFMConfig(**data)

    @classmethod
    def from_hydra(cls, cfg: Any, config_path: Optional[str] = None) -> Union['DFMConfig', 'DDFMConfig']:
        """Create config from a Hydra DictConfig or plain dict.
        
        Parameters
        ----------
        cfg : DictConfig | dict
            Hydra DictConfig (or dict) that contains the composed configuration.
        config_path : Optional[str]
            Path to config directory (for loading individual series files when series is a list of strings).
            If None, attempts to extract from cfg._config_path_ or cfg._name_.
        
        Returns
        -------
        DFMConfig or DDFMConfig
            Validated configuration instance (type determined automatically).
        """
        from pathlib import Path
        try:
            from omegaconf import DictConfig, OmegaConf  # type: ignore
            if isinstance(cfg, DictConfig):
                # Try to extract config_path from Hydra metadata
                if config_path is None:
                    # Hydra stores config path in _config_path_ or we can infer from _name_
                    config_path_attr = getattr(cfg, '_config_path_', None)
                    if config_path_attr:
                        config_path = str(config_path_attr)
                    else:
                        # Try to infer from _name_ (e.g., "experiment/consumption_kowrccnse_report")
                        name_attr = getattr(cfg, '_name_', None)
                        if name_attr and '/' in str(name_attr):
                            # Extract directory path (e.g., "experiment" from "experiment/consumption_kowrccnse_report")
                            # But we need the full config directory, not just the subdirectory
                            # So we need to go up one level from the config file location
                            # For now, we'll use the parent directory of the config_name
                            config_path = str(Path(str(name_attr)).parent)
                
                cfg = OmegaConf.to_container(cfg, resolve=True)
        except Exception:
            # OmegaConf not available or not a DictConfig; assume dict
            pass
        if not isinstance(cfg, dict):
            raise TypeError("from_hydra expects a DictConfig or dict.")
        # Use DFMConfig.from_dict which handles type detection (defined on DFMConfig, not BaseModelConfig)
        return DFMConfig.from_dict(cfg, config_path=config_path)


# Add factory methods to DFMConfig class
def _dfm_from_dict(cls, data: Dict[str, Any], config_path: Optional[str] = None) -> Union['DFMConfig', 'DDFMConfig']:
    """Create DFMConfig or DDFMConfig from dictionary (auto-detects type).
    
    Parameters
    ----------
    data : Dict[str, Any]
        Dictionary containing configuration
    config_path : Optional[str]
        Path to config directory (for loading individual series files when series is a list of strings)
    """
    # Handle Hydra format (series as dict)
    if 'series' in data and isinstance(data['series'], dict):
        # Use shared _from_hydra_dict which has detection logic
        return DDFMConfig._from_hydra_dict(data)
    
    # Handle list format - use detection logic to determine config type
    if 'series' in data and isinstance(data['series'], list):
        # Parse series list using helper (pass config_path for string series IDs)
        series_list = _parse_series_list(data['series'], config_path=config_path)
        
        # Handle blocks using helpers
        if 'blocks' in data:
            blocks_data = data['blocks']
            if isinstance(blocks_data, dict):
                blocks_dict = _parse_blocks_dict(blocks_data)
            else:
                raise ValueError(f"blocks must be a dict, got {type(blocks_data)}")
        else:
            # Infer blocks from series using helper
            blocks_dict = _infer_blocks(series_list, data)
        
        # Determine config type using helper function
        config_type = _detect_config_type(data)
        
        if config_type == 'ddfm':
            return DDFMConfig(
                series=series_list,
                **DDFMConfig._extract_ddfm(data)
            )
        else:
            return DFMConfig(
                series=series_list,
                blocks=blocks_dict,
                **DFMConfig._extract_dfm_params(data)
            )
    
    # Direct instantiation - try to detect type using helper function
    config_type = _detect_config_type(data)
    
    if config_type == 'ddfm':
        return DDFMConfig(**data)
    else:
        return DFMConfig(**data)

def _from_hydra(cls, cfg: Any, config_path: Optional[str] = None) -> Union['DFMConfig', 'DDFMConfig']:
    """Create config from Hydra DictConfig (auto-detects DFM/DDFM).
    
    Parameters
    ----------
    cfg : DictConfig | dict
        Hydra DictConfig or dict containing configuration
    config_path : Optional[str]
        Path to config directory (for loading individual series files when series is a list of strings)
    """
    try:
        from omegaconf import DictConfig, OmegaConf
        if isinstance(cfg, DictConfig):
            cfg = OmegaConf.to_container(cfg, resolve=True)
    except Exception:
        pass
    if not isinstance(cfg, dict):
        raise TypeError("from_hydra expects a DictConfig or dict.")
    return cls.from_dict(cfg, config_path=config_path)

DFMConfig.from_dict = classmethod(_dfm_from_dict)
DFMConfig.from_hydra = classmethod(_from_hydra)

# Add factory methods to DDFMConfig class
def _ddfm_from_dict(cls, data: Dict[str, Any], config_path: Optional[str] = None) -> Union['DFMConfig', 'DDFMConfig']:
    """Create DDFMConfig or DFMConfig from dictionary (auto-detects type).
    
    Parameters
    ----------
    data : Dict[str, Any]
        Dictionary containing configuration
    config_path : Optional[str]
        Path to config directory (for loading individual series files when series is a list of strings)
    """
    # Determine config type first
    config_type = _detect_config_type(data)
    
    # Handle Hydra format (series as dict)
    if 'series' in data and isinstance(data['series'], dict):
        return DDFMConfig._from_hydra_dict(data)
    
    # Handle list format
    if 'series' in data and isinstance(data['series'], list):
        # Parse series list using helper (pass config_path for string series IDs)
        series_list = _parse_series_list(data['series'], config_path=config_path)
        
        if config_type == 'ddfm':
            # DDFM does not use block structure
            return DDFMConfig(
                series=series_list,
                **DDFMConfig._extract_ddfm(data)
            )
        else:
            # DFM: Handle blocks
            if 'blocks' in data:
                blocks_data = data['blocks']
                if isinstance(blocks_data, dict):
                    blocks_dict = _parse_blocks_dict(blocks_data)
                else:
                    raise ValueError(f"blocks must be a dict, got {type(blocks_data)}")
            else:
                # Infer blocks from series using helper
                blocks_dict = _infer_blocks(series_list, data)
            
            return DFMConfig(
                series=series_list,
                blocks=blocks_dict,
                **DFMConfig._extract_dfm_params(data)
            )
    
    # Direct instantiation
    if config_type == 'ddfm':
        # DDFM does not use block structure - remove blocks-related fields from data
        data_clean = {k: v for k, v in data.items() if k not in ['blocks', 'block_names', 'factors_per_block']}
        return DDFMConfig(**data_clean)
    else:
        return DFMConfig(**data)

DDFMConfig.from_dict = classmethod(_ddfm_from_dict)
DDFMConfig.from_hydra = classmethod(_from_hydra)


