"""
Demographic Analysis Service

This service provides demographic analysis functionality for loan applications
and payment patterns. It handles age calculation, age group categorization,
gender categorization, and payment pattern classification.

Validates: Requirements 2.2, 2.3, 2.4, 2.10, 2.11
"""

from datetime import date
from decimal import Decimal
from typing import Optional, Dict, List, Tuple
from django.db.models import QuerySet
from django.utils import timezone


class DemographicAnalysisService:
    """
    Service for analyzing loan applications and payments by demographics.
    
    This service provides methods for:
    - Calculating age from birth date
    - Categorizing borrowers into age groups
    - Categorizing borrowers by gender
    - Classifying payment patterns (on-time vs late)
    """
    
    # Age group definitions
    AGE_GROUPS = [
        (18, 25, '18-25'),
        (26, 35, '26-35'),
        (36, 45, '36-45'),
        (46, 55, '46-55'),
        (56, 65, '56-65'),
        (66, 999, '66+'),
    ]
    
    # Gender categories
    GENDER_CATEGORIES = {
        'M': 'Male',
        'F': 'Female',
        'O': 'Other',
        None: 'Not Specified',
        '': 'Not Specified',
    }
    
    @staticmethod
    def calculate_age(birth_date: Optional[date], reference_date: Optional[date] = None) -> Optional[int]:
        """
        Calculate age from birth date.
        
        Args:
            birth_date: The date of birth
            reference_date: The date to calculate age as of (defaults to today)
            
        Returns:
            Age in years, or None if birth_date is None
            
        Examples:
            >>> calculate_age(date(1990, 1, 1), date(2024, 1, 1))
            34
            >>> calculate_age(None)
            None
        """
        if birth_date is None:
            return None
        
        if reference_date is None:
            reference_date = date.today()
        
        age = reference_date.year - birth_date.year
        
        # Adjust if birthday hasn't occurred yet this year
        if (reference_date.month, reference_date.day) < (birth_date.month, birth_date.day):
            age -= 1
        
        return age
    
    @classmethod
    def categorize_age_group(cls, birth_date: Optional[date], reference_date: Optional[date] = None) -> str:
        """
        Categorize a borrower into an age group.
        
        Args:
            birth_date: The date of birth
            reference_date: The date to calculate age as of (defaults to today)
            
        Returns:
            Age group string: '18-25', '26-35', '36-45', '46-55', '56-65', '66+', or 'Not Specified'
            
        Examples:
            >>> categorize_age_group(date(1990, 1, 1), date(2024, 1, 1))
            '26-35'
            >>> categorize_age_group(None)
            'Not Specified'
        """
        age = cls.calculate_age(birth_date, reference_date)
        
        if age is None:
            return 'Not Specified'
        
        # Find matching age group
        for min_age, max_age, group_name in cls.AGE_GROUPS:
            if min_age <= age <= max_age:
                return group_name
        
        # If age is below 18, return Not Specified
        return 'Not Specified'
    
    @classmethod
    def categorize_gender(cls, gender: Optional[str]) -> str:
        """
        Categorize a borrower by gender.
        
        Args:
            gender: Gender code ('M', 'F', 'O', None, or empty string)
            
        Returns:
            Gender category: 'Male', 'Female', 'Other', or 'Not Specified'
            
        Examples:
            >>> categorize_gender('M')
            'Male'
            >>> categorize_gender(None)
            'Not Specified'
            >>> categorize_gender('')
            'Not Specified'
        """
        return cls.GENDER_CATEGORIES.get(gender, 'Not Specified')
    
    @staticmethod
    def classify_payment_pattern(loan) -> str:
        """
        Classify a loan's payment pattern as on-time or late.
        
        A payment is considered on-time if any payment was made on or before the due date.
        Otherwise, it's classified as late.
        
        Args:
            loan: Loan object with due_date and repayments
            
        Returns:
            'on-time' or 'late'
            
        Examples:
            >>> classify_payment_pattern(loan_with_early_payment)
            'on-time'
            >>> classify_payment_pattern(loan_with_late_payment)
            'late'
        """
        # Get all repayments for this loan
        repayments = loan.repayments.all()
        
        # Convert due_date to date if it's a datetime
        due_date = loan.due_date
        if isinstance(due_date, timezone.datetime):
            due_date = due_date.date()
        
        if not repayments.exists():
            # No payments made - consider as late if past due date
            if due_date < date.today():
                return 'late'
            # If not yet due, we can't classify
            return 'pending'
        
        # Check if any payment was made on or before due date
        for repayment in repayments:
            payment_date = repayment.payment_date
            if isinstance(payment_date, timezone.datetime):
                payment_date = payment_date.date()
            
            if payment_date <= due_date:
                return 'on-time'
        
        # All payments were after due date
        return 'late'
    
    @classmethod
    def analyze_by_age_group(cls, loans: QuerySet, reference_date: Optional[date] = None) -> Dict[str, Dict]:
        """
        Analyze loans by age group.
        
        Args:
            loans: QuerySet of Loan objects
            reference_date: The date to calculate ages as of (defaults to today)
            
        Returns:
            Dictionary mapping age groups to statistics:
            {
                '18-25': {
                    'total_applications': 10,
                    'approved_applications': 8,
                    'approval_rate': 80.0,
                    'average_loan_amount': Decimal('50000.00'),
                    'on_time_payments': 6,
                    'late_payments': 2,
                    'on_time_rate': 75.0
                },
                ...
            }
        """
        results = {}
        
        # Initialize all age groups
        for _, _, group_name in cls.AGE_GROUPS:
            results[group_name] = {
                'total_applications': 0,
                'approved_applications': 0,
                'approval_rate': Decimal('0.00'),
                'average_loan_amount': Decimal('0.00'),
                'on_time_payments': 0,
                'late_payments': 0,
                'on_time_rate': Decimal('0.00'),
            }
        results['Not Specified'] = {
            'total_applications': 0,
            'approved_applications': 0,
            'approval_rate': Decimal('0.00'),
            'average_loan_amount': Decimal('0.00'),
            'on_time_payments': 0,
            'late_payments': 0,
            'on_time_rate': Decimal('0.00'),
        }
        
        # Analyze each loan
        for loan in loans:
            # Get borrower's age group
            birth_date = loan.borrower.date_of_birth if hasattr(loan.borrower, 'date_of_birth') else None
            age_group = cls.categorize_age_group(birth_date, reference_date)
            
            # Count application
            results[age_group]['total_applications'] += 1
            
            # Count if approved (disbursed)
            if loan.status in ['active', 'paid', 'defaulted', 'rolled_over', 'written_off']:
                results[age_group]['approved_applications'] += 1
                
                # Add to loan amount sum
                if not hasattr(results[age_group], '_loan_amount_sum'):
                    results[age_group]['_loan_amount_sum'] = Decimal('0.00')
                results[age_group]['_loan_amount_sum'] += loan.principal_amount
                
                # Classify payment pattern
                payment_pattern = cls.classify_payment_pattern(loan)
                if payment_pattern == 'on-time':
                    results[age_group]['on_time_payments'] += 1
                elif payment_pattern == 'late':
                    results[age_group]['late_payments'] += 1
        
        # Calculate rates and averages
        for group_name, stats in results.items():
            total = stats['total_applications']
            approved = stats['approved_applications']
            
            if total > 0:
                stats['approval_rate'] = round(Decimal(approved) / Decimal(total) * Decimal('100'), 2)
            
            if approved > 0:
                stats['average_loan_amount'] = round(
                    stats.get('_loan_amount_sum', Decimal('0.00')) / Decimal(approved), 2
                )
                
                total_payments = stats['on_time_payments'] + stats['late_payments']
                if total_payments > 0:
                    stats['on_time_rate'] = round(
                        Decimal(stats['on_time_payments']) / Decimal(total_payments) * Decimal('100'), 2
                    )
            
            # Remove temporary sum field
            if '_loan_amount_sum' in stats:
                del stats['_loan_amount_sum']
        
        return results
    
    @classmethod
    def analyze_by_gender(cls, loans: QuerySet) -> Dict[str, Dict]:
        """
        Analyze loans by gender.
        
        Args:
            loans: QuerySet of Loan objects
            
        Returns:
            Dictionary mapping gender categories to statistics:
            {
                'Male': {
                    'total_applications': 50,
                    'approved_applications': 40,
                    'approval_rate': 80.0,
                    'average_loan_amount': Decimal('75000.00'),
                    'on_time_payments': 30,
                    'late_payments': 10,
                    'on_time_rate': 75.0
                },
                ...
            }
        """
        results = {}
        
        # Initialize all gender categories
        for category in ['Male', 'Female', 'Other', 'Not Specified']:
            results[category] = {
                'total_applications': 0,
                'approved_applications': 0,
                'approval_rate': Decimal('0.00'),
                'average_loan_amount': Decimal('0.00'),
                'on_time_payments': 0,
                'late_payments': 0,
                'on_time_rate': Decimal('0.00'),
            }
        
        # Analyze each loan
        for loan in loans:
            # Get borrower's gender category
            gender = loan.borrower.gender if hasattr(loan.borrower, 'gender') else None
            gender_category = cls.categorize_gender(gender)
            
            # Count application
            results[gender_category]['total_applications'] += 1
            
            # Count if approved (disbursed)
            if loan.status in ['active', 'paid', 'defaulted', 'rolled_over', 'written_off']:
                results[gender_category]['approved_applications'] += 1
                
                # Add to loan amount sum
                if not hasattr(results[gender_category], '_loan_amount_sum'):
                    results[gender_category]['_loan_amount_sum'] = Decimal('0.00')
                results[gender_category]['_loan_amount_sum'] += loan.principal_amount
                
                # Classify payment pattern
                payment_pattern = cls.classify_payment_pattern(loan)
                if payment_pattern == 'on-time':
                    results[gender_category]['on_time_payments'] += 1
                elif payment_pattern == 'late':
                    results[gender_category]['late_payments'] += 1
        
        # Calculate rates and averages
        for category, stats in results.items():
            total = stats['total_applications']
            approved = stats['approved_applications']
            
            if total > 0:
                stats['approval_rate'] = round(Decimal(approved) / Decimal(total) * Decimal('100'), 2)
            
            if approved > 0:
                stats['average_loan_amount'] = round(
                    stats.get('_loan_amount_sum', Decimal('0.00')) / Decimal(approved), 2
                )
                
                total_payments = stats['on_time_payments'] + stats['late_payments']
                if total_payments > 0:
                    stats['on_time_rate'] = round(
                        Decimal(stats['on_time_payments']) / Decimal(total_payments) * Decimal('100'), 2
                    )
            
            # Remove temporary sum field
            if '_loan_amount_sum' in stats:
                del stats['_loan_amount_sum']
        
        return results
