"""
Client Growth Analytics Service

This service provides comprehensive client growth and analytics functionality
including acquisition trends, demographic analysis, lifecycle analytics, and
loan officer performance tracking.
"""

from django.db import models
from django.db.models import Count, Sum, Avg, Q, F, Case, When, Value, IntegerField
from django.utils import timezone
from datetime import datetime, timedelta, date
from decimal import Decimal
from typing import Dict, List, Optional, Any, Tuple
import calendar
from collections import defaultdict

from users.models import CustomUser, Branch
from loans.models import Loan, LoanApplication
from utils.models import Notification


class ClientGrowthAnalytics:
    """
    Service class for client growth and analytics
    Provides methods for tracking client acquisition, demographics, lifecycle, and officer performance
    """
    
    def __init__(self):
        self.current_date = timezone.now().date()
    
    def get_acquisition_trends(self, branch_id: Optional[str] = None, period: str = 'monthly', 
                             months: int = 12) -> Dict[str, Any]:
        """
        Get client acquisition trends over time
        
        Args:
            branch_id: Optional branch ID to filter by
            period: 'daily', 'weekly', 'monthly', or 'yearly'
            months: Number of months to look back
            
        Returns:
            Dictionary containing acquisition trend data
        """
        end_date = self.current_date
        start_date = end_date - timedelta(days=months * 30)
        
        # Base queryset for clients
        clients_query = CustomUser.objects.filter(
            role='borrower',
            created_at__date__gte=start_date,
            created_at__date__lte=end_date
        )
        
        if branch_id:
            clients_query = clients_query.filter(branch_id=branch_id)
        
        # Group by period
        if period == 'daily':
            date_format = '%Y-%m-%d'
            date_field = 'created_at__date'
        elif period == 'weekly':
            date_format = '%Y-W%U'
            date_field = 'created_at__week'
        elif period == 'monthly':
            date_format = '%Y-%m'
            date_field = 'created_at__month'
        else:  # yearly
            date_format = '%Y'
            date_field = 'created_at__year'
        
        # Get acquisition data
        acquisition_data = clients_query.extra(
            select={'period': f"DATE_FORMAT(created_at, '{date_format}')"}
        ).values('period').annotate(
            new_clients=Count('id'),
            approved_clients=Count('id', filter=Q(status='active')),
            pending_clients=Count('id', filter=Q(status='pending_approval')),
            rejected_clients=Count('id', filter=Q(status__in=['inactive', 'suspended']))
        ).order_by('period')
        
        # Calculate conversion rates
        trend_data = []
        total_new = 0
        total_approved = 0
        
        for data in acquisition_data:
            total_new += data['new_clients']
            total_approved += data['approved_clients']
            
            conversion_rate = (data['approved_clients'] / data['new_clients'] * 100) if data['new_clients'] > 0 else 0
            
            trend_data.append({
                'period': data['period'],
                'new_clients': data['new_clients'],
                'approved_clients': data['approved_clients'],
                'pending_clients': data['pending_clients'],
                'rejected_clients': data['rejected_clients'],
                'conversion_rate': round(conversion_rate, 2),
                'cumulative_new': total_new,
                'cumulative_approved': total_approved
            })
        
        # Calculate growth rates
        for i in range(1, len(trend_data)):
            prev_new = trend_data[i-1]['new_clients']
            current_new = trend_data[i]['new_clients']
            
            if prev_new > 0:
                growth_rate = ((current_new - prev_new) / prev_new) * 100
                trend_data[i]['growth_rate'] = round(growth_rate, 2)
            else:
                trend_data[i]['growth_rate'] = 0
        
        # Set first period growth rate to 0
        if trend_data:
            trend_data[0]['growth_rate'] = 0
        
        # Calculate summary statistics
        total_clients = clients_query.count()
        avg_monthly_acquisition = total_clients / months if months > 0 else 0
        overall_conversion_rate = (total_approved / total_new * 100) if total_new > 0 else 0
        
        return {
            'trend_data': trend_data,
            'summary': {
                'total_clients': total_clients,
                'total_approved': total_approved,
                'total_pending': clients_query.filter(status='pending_approval').count(),
                'total_rejected': clients_query.filter(status__in=['inactive', 'suspended']).count(),
                'avg_monthly_acquisition': round(avg_monthly_acquisition, 2),
                'overall_conversion_rate': round(overall_conversion_rate, 2),
                'period': period,
                'months_analyzed': months,
                'date_range': {
                    'start': start_date.isoformat(),
                    'end': end_date.isoformat()
                }
            }
        }
    
    def get_demographic_analysis(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Get client demographic breakdown and analysis
        
        Args:
            filters: Optional filters (branch_id, date_range, status, etc.)
            
        Returns:
            Dictionary containing demographic analysis data
        """
        # Base queryset
        clients_query = CustomUser.objects.filter(role='borrower')
        
        # Apply filters
        if filters:
            if 'branch_id' in filters and filters['branch_id']:
                clients_query = clients_query.filter(branch_id=filters['branch_id'])
            
            if 'status' in filters and filters['status']:
                clients_query = clients_query.filter(status=filters['status'])
            
            if 'date_range' in filters and filters['date_range']:
                start_date = filters['date_range'].get('start')
                end_date = filters['date_range'].get('end')
                if start_date:
                    clients_query = clients_query.filter(created_at__date__gte=start_date)
                if end_date:
                    clients_query = clients_query.filter(created_at__date__lte=end_date)
        
        total_clients = clients_query.count()
        
        # Gender distribution
        gender_data = clients_query.values('gender').annotate(
            count=Count('id')
        ).order_by('gender')
        
        gender_distribution = {}
        for item in gender_data:
            gender_key = item['gender'] or 'Unknown'
            gender_distribution[gender_key] = {
                'count': item['count'],
                'percentage': round((item['count'] / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        # Age distribution
        current_year = timezone.now().year
        age_ranges = [
            ('18-25', 18, 25),
            ('26-35', 26, 35),
            ('36-45', 36, 45),
            ('46-55', 46, 55),
            ('56-65', 56, 65),
            ('65+', 66, 100)
        ]
        
        age_distribution = {}
        for age_range, min_age, max_age in age_ranges:
            min_birth_year = current_year - max_age
            max_birth_year = current_year - min_age
            
            count = clients_query.filter(
                date_of_birth__year__gte=min_birth_year,
                date_of_birth__year__lte=max_birth_year
            ).count()
            
            age_distribution[age_range] = {
                'count': count,
                'percentage': round((count / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        # Marital status distribution
        marital_data = clients_query.values('marital_status').annotate(
            count=Count('id')
        ).order_by('marital_status')
        
        marital_distribution = {}
        for item in marital_data:
            marital_key = item['marital_status'] or 'Unknown'
            marital_distribution[marital_key] = {
                'count': item['count'],
                'percentage': round((item['count'] / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        # Geographic distribution (by county)
        geographic_data = clients_query.values('county').annotate(
            count=Count('id')
        ).order_by('-count')[:10]  # Top 10 counties
        
        geographic_distribution = {}
        for item in geographic_data:
            county_key = item['county'] or 'Unknown'
            geographic_distribution[county_key] = {
                'count': item['count'],
                'percentage': round((item['count'] / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        # Business type distribution
        business_data = clients_query.values('business_type').annotate(
            count=Count('id')
        ).order_by('-count')[:10]  # Top 10 business types
        
        business_distribution = {}
        for item in business_data:
            business_key = item['business_type'] or 'Unknown'
            business_distribution[business_key] = {
                'count': item['count'],
                'percentage': round((item['count'] / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        # Income distribution
        income_ranges = [
            ('0-10K', 0, 10000),
            ('10K-25K', 10000, 25000),
            ('25K-50K', 25000, 50000),
            ('50K-100K', 50000, 100000),
            ('100K-250K', 100000, 250000),
            ('250K+', 250000, float('inf'))
        ]
        
        income_distribution = {}
        for income_range, min_income, max_income in income_ranges:
            if max_income == float('inf'):
                count = clients_query.filter(monthly_income__gte=min_income).count()
            else:
                count = clients_query.filter(
                    monthly_income__gte=min_income,
                    monthly_income__lt=max_income
                ).count()
            
            income_distribution[income_range] = {
                'count': count,
                'percentage': round((count / total_clients * 100), 2) if total_clients > 0 else 0
            }
        
        return {
            'total_clients': total_clients,
            'gender_distribution': gender_distribution,
            'age_distribution': age_distribution,
            'marital_distribution': marital_distribution,
            'geographic_distribution': geographic_distribution,
            'business_distribution': business_distribution,
            'income_distribution': income_distribution,
            'analysis_date': timezone.now().isoformat(),
            'filters_applied': filters or {}
        }
    
    def get_lifecycle_analytics(self, client_segment: Optional[str] = None) -> Dict[str, Any]:
        """
        Get client lifecycle analytics and journey analysis
        
        Args:
            client_segment: Optional segment filter ('new', 'active', 'dormant', 'churned')
            
        Returns:
            Dictionary containing lifecycle analytics data
        """
        # Define lifecycle stages
        current_date = timezone.now().date()
        
        # Base queryset
        clients_query = CustomUser.objects.filter(role='borrower')
        
        # Apply segment filter
        if client_segment:
            if client_segment == 'new':
                # Clients registered in last 30 days
                clients_query = clients_query.filter(
                    created_at__date__gte=current_date - timedelta(days=30)
                )
            elif client_segment == 'active':
                # Clients with active loans or recent activity
                clients_query = clients_query.filter(
                    Q(loans__status='active') | 
                    Q(loan_applications__created_at__gte=current_date - timedelta(days=90))
                ).distinct()
            elif client_segment == 'dormant':
                # Clients with no activity in last 90 days
                active_client_ids = CustomUser.objects.filter(
                    Q(loans__status='active') | 
                    Q(loan_applications__created_at__gte=current_date - timedelta(days=90))
                ).values_list('id', flat=True)
                clients_query = clients_query.exclude(id__in=active_client_ids)
            elif client_segment == 'churned':
                # Clients with rejected status or no activity in 180+ days
                clients_query = clients_query.filter(
                    Q(status__in=['inactive', 'suspended']) |
                    Q(created_at__date__lt=current_date - timedelta(days=180))
                )
        
        total_clients = clients_query.count()
        
        # Lifecycle stage analysis
        lifecycle_stages = {
            'registration': clients_query.count(),
            'kyc_completed': clients_query.filter(is_verified=True).count(),
            'first_application': clients_query.filter(loan_applications__isnull=False).distinct().count(),
            'first_approval': clients_query.filter(loans__isnull=False).distinct().count(),
            'active_borrower': clients_query.filter(loans__status='active').distinct().count(),
            'repeat_borrower': clients_query.annotate(
                loan_count=Count('loans')
            ).filter(loan_count__gt=1).count(),
            'defaulted': clients_query.filter(loans__status='defaulted').distinct().count()
        }
        
        # Calculate conversion rates between stages
        conversion_rates = {}
        stages = list(lifecycle_stages.keys())
        for i in range(1, len(stages)):
            prev_stage = stages[i-1]
            current_stage = stages[i]
            
            if lifecycle_stages[prev_stage] > 0:
                rate = (lifecycle_stages[current_stage] / lifecycle_stages[prev_stage]) * 100
                conversion_rates[f"{prev_stage}_to_{current_stage}"] = round(rate, 2)
            else:
                conversion_rates[f"{prev_stage}_to_{current_stage}"] = 0
        
        # Time to conversion analysis
        time_to_first_loan = clients_query.filter(
            loans__isnull=False
        ).annotate(
            days_to_loan=F('loans__created_at') - F('created_at')
        ).aggregate(
            avg_days=Avg('days_to_loan'),
            min_days=models.Min('days_to_loan'),
            max_days=models.Max('days_to_loan')
        )
        
        # Client value analysis
        client_value_data = clients_query.annotate(
            total_borrowed=Sum('loans__principal_amount'),
            total_paid=Sum('loans__amount_paid'),
            loan_count=Count('loans'),
            avg_loan_size=Avg('loans__principal_amount')
        ).aggregate(
            avg_total_borrowed=Avg('total_borrowed'),
            avg_total_paid=Avg('total_paid'),
            avg_loan_count=Avg('loan_count'),
            avg_loan_size=Avg('avg_loan_size')
        )
        
        # Retention analysis
        retention_periods = [30, 60, 90, 180, 365]
        retention_data = {}
        
        for period in retention_periods:
            cutoff_date = current_date - timedelta(days=period)
            
            # Clients who registered before the cutoff
            eligible_clients = clients_query.filter(created_at__date__lte=cutoff_date)
            eligible_count = eligible_clients.count()
            
            if eligible_count > 0:
                # Clients who had activity after the cutoff
                retained_clients = eligible_clients.filter(
                    Q(loans__created_at__date__gt=cutoff_date) |
                    Q(loan_applications__created_at__date__gt=cutoff_date)
                ).distinct().count()
                
                retention_rate = (retained_clients / eligible_count) * 100
                retention_data[f"{period}_days"] = {
                    'eligible_clients': eligible_count,
                    'retained_clients': retained_clients,
                    'retention_rate': round(retention_rate, 2)
                }
            else:
                retention_data[f"{period}_days"] = {
                    'eligible_clients': 0,
                    'retained_clients': 0,
                    'retention_rate': 0
                }
        
        return {
            'total_clients': total_clients,
            'client_segment': client_segment,
            'lifecycle_stages': lifecycle_stages,
            'conversion_rates': conversion_rates,
            'time_to_conversion': {
                'avg_days_to_first_loan': time_to_first_loan['avg_days'].days if time_to_first_loan['avg_days'] else 0,
                'min_days_to_first_loan': time_to_first_loan['min_days'].days if time_to_first_loan['min_days'] else 0,
                'max_days_to_first_loan': time_to_first_loan['max_days'].days if time_to_first_loan['max_days'] else 0
            },
            'client_value': {
                'avg_total_borrowed': float(client_value_data['avg_total_borrowed'] or 0),
                'avg_total_paid': float(client_value_data['avg_total_paid'] or 0),
                'avg_loan_count': float(client_value_data['avg_loan_count'] or 0),
                'avg_loan_size': float(client_value_data['avg_loan_size'] or 0)
            },
            'retention_analysis': retention_data,
            'analysis_date': timezone.now().isoformat()
        }
    
    def get_officer_performance(self, officer_id: Optional[str] = None, period: str = 'monthly',
                              months: int = 6) -> Dict[str, Any]:
        """
        Get loan officer performance metrics and tracking
        
        Args:
            officer_id: Optional specific officer ID, if None returns all officers
            period: 'monthly', 'quarterly', or 'yearly'
            months: Number of months to analyze
            
        Returns:
            Dictionary containing officer performance data
        """
        end_date = timezone.now().date()
        start_date = end_date - timedelta(days=months * 30)
        
        # Base queryset for loan officers
        officers_query = CustomUser.objects.filter(
            role__in=['loan_officer', 'team_leader']
        )
        
        if officer_id:
            officers_query = officers_query.filter(id=officer_id)
        
        performance_data = []
        
        for officer in officers_query:
            # Get officer's portfolio clients
            portfolio_clients = officer.portfolio_clients.filter(
                role='borrower',
                assigned_date__date__gte=start_date if officer.portfolio_clients.filter(assigned_date__isnull=False).exists() else start_date
            )
            
            client_ids = list(portfolio_clients.values_list('id', flat=True))
            
            # Loan statistics
            loans = Loan.objects.filter(
                borrower_id__in=client_ids,
                created_at__date__gte=start_date,
                created_at__date__lte=end_date
            )
            
            applications = LoanApplication.objects.filter(
                borrower_id__in=client_ids,
                created_at__date__gte=start_date,
                created_at__date__lte=end_date
            )
            
            # Calculate performance metrics
            total_clients = portfolio_clients.count()
            total_loans = loans.count()
            active_loans = loans.filter(status='active').count()
            completed_loans = loans.filter(status='paid').count()
            defaulted_loans = loans.filter(status='defaulted').count()
            
            total_disbursed = loans.aggregate(total=Sum('principal_amount'))['total'] or Decimal('0')
            total_collected = loans.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0')
            # Calculate outstanding using outstanding_amount property
            active_loan_objects = loans.filter(status='active')
            total_outstanding = sum(loan.outstanding_amount for loan in active_loan_objects)
            
            # Application metrics
            total_applications = applications.count()
            approved_applications = applications.filter(status='approved').count()
            rejected_applications = applications.filter(status='rejected').count()
            pending_applications = applications.filter(status='pending').count()
            
            # Calculate rates
            collection_rate = (total_collected / total_disbursed * 100) if total_disbursed > 0 else Decimal('0')
            default_rate = (Decimal(str(defaulted_loans)) / Decimal(str(total_loans)) * 100) if total_loans > 0 else Decimal('0')
            approval_rate = (Decimal(str(approved_applications)) / Decimal(str(total_applications)) * 100) if total_applications > 0 else Decimal('0')
            
            # Client acquisition in period
            new_clients = portfolio_clients.filter(
                assigned_date__date__gte=start_date,
                assigned_date__date__lte=end_date
            ).count()
            
            # Average loan size
            avg_loan_size = loans.aggregate(avg=Avg('principal_amount'))['avg'] or Decimal('0')
            
            # Portfolio at risk (PAR) - loans overdue by 30+ days
            overdue_loans = loans.filter(
                status='active',
                due_date__lt=end_date - timedelta(days=30)
            )
            par_amount = overdue_loans.aggregate(
                total=Sum(F('total_amount') - F('amount_paid'))
            )['total'] or Decimal('0')
            par_ratio = (par_amount / total_outstanding * 100) if total_outstanding > 0 else Decimal('0')
            
            # Monthly trend data
            monthly_trends = []
            for i in range(months):
                month_start = end_date.replace(day=1) - timedelta(days=i*30)
                month_end = (month_start + timedelta(days=32)).replace(day=1) - timedelta(days=1)
                
                month_loans = loans.filter(
                    created_at__date__gte=month_start,
                    created_at__date__lte=month_end
                )
                
                month_disbursed = month_loans.aggregate(total=Sum('principal_amount'))['total'] or Decimal('0')
                month_collected = month_loans.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0')
                
                monthly_trends.append({
                    'month': month_start.strftime('%Y-%m'),
                    'loans_disbursed': month_loans.count(),
                    'amount_disbursed': float(month_disbursed),
                    'amount_collected': float(month_collected),
                    'new_clients': portfolio_clients.filter(
                        assigned_date__date__gte=month_start,
                        assigned_date__date__lte=month_end
                    ).count()
                })
            
            monthly_trends.reverse()  # Show oldest to newest
            
            officer_performance = {
                'officer_id': str(officer.id),
                'officer_name': officer.get_full_name(),
                'officer_role': officer.role,
                'branch': officer.branch.name if officer.branch else 'No Branch',
                'portfolio_metrics': {
                    'total_clients': total_clients,
                    'new_clients_period': new_clients,
                    'total_loans': total_loans,
                    'active_loans': active_loans,
                    'completed_loans': completed_loans,
                    'defaulted_loans': defaulted_loans,
                    'total_applications': total_applications,
                    'approved_applications': approved_applications,
                    'rejected_applications': rejected_applications,
                    'pending_applications': pending_applications
                },
                'financial_metrics': {
                    'total_disbursed': float(total_disbursed),
                    'total_collected': float(total_collected),
                    'total_outstanding': float(total_outstanding),
                    'avg_loan_size': float(avg_loan_size),
                    'par_amount': float(par_amount)
                },
                'performance_ratios': {
                    'collection_rate': float(collection_rate),
                    'default_rate': float(default_rate),
                    'approval_rate': float(approval_rate),
                    'par_ratio': float(par_ratio)
                },
                'monthly_trends': monthly_trends,
                'period_analyzed': {
                    'start_date': start_date.isoformat(),
                    'end_date': end_date.isoformat(),
                    'months': months
                }
            }
            
            performance_data.append(officer_performance)
        
        # Calculate comparative metrics if multiple officers
        if len(performance_data) > 1:
            # Calculate averages and rankings
            metrics_for_comparison = [
                'collection_rate', 'default_rate', 'approval_rate', 'par_ratio'
            ]
            
            for metric in metrics_for_comparison:
                values = [officer['performance_ratios'][metric] for officer in performance_data]
                avg_value = sum(values) / len(values) if values else 0
                
                for officer in performance_data:
                    officer['performance_ratios'][f'{metric}_vs_avg'] = officer['performance_ratios'][metric] - avg_value
            
            # Rank officers by key metrics
            performance_data.sort(key=lambda x: x['performance_ratios']['collection_rate'], reverse=True)
            for i, officer in enumerate(performance_data):
                officer['rankings'] = {
                    'collection_rate_rank': i + 1,
                    'total_officers': len(performance_data)
                }
        
        return {
            'officers_analyzed': len(performance_data),
            'period': period,
            'months_analyzed': months,
            'performance_data': performance_data,
            'analysis_date': timezone.now().isoformat(),
            'summary': {
                'total_clients': sum(officer['portfolio_metrics']['total_clients'] for officer in performance_data),
                'total_loans': sum(officer['portfolio_metrics']['total_loans'] for officer in performance_data),
                'total_disbursed': sum(officer['financial_metrics']['total_disbursed'] for officer in performance_data),
                'total_collected': sum(officer['financial_metrics']['total_collected'] for officer in performance_data),
                'avg_collection_rate': sum(officer['performance_ratios']['collection_rate'] for officer in performance_data) / len(performance_data) if performance_data else 0,
                'avg_default_rate': sum(officer['performance_ratios']['default_rate'] for officer in performance_data) / len(performance_data) if performance_data else 0
            }
        }