"""
Input sanitization utilities for loan management system
Sanitizes user inputs to prevent SQL injection, XSS, and other attacks
Requirements: 10.9, 10.10
"""
import re
import html
from django.utils.html import escape, strip_tags


class InputSanitizer:
    """
    Sanitizer for user inputs to prevent security vulnerabilities
    Requirements: 10.9, 10.10
    """
    
    # Patterns for detecting potential SQL injection attempts
    SQL_INJECTION_PATTERNS = [
        r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC|EXECUTE|UNION|DECLARE)\b)",
        r"(--|;|\/\*|\*\/|xp_|sp_)",
        r"(\bOR\b.*=.*|'\s*OR\s*')",
        r"(\bAND\b.*=.*|'\s*AND\s*')",
    ]
    
    # Patterns for detecting XSS attempts
    XSS_PATTERNS = [
        r"<script[^>]*>.*?</script>",
        r"javascript:",
        r"on\w+\s*=",  # Event handlers like onclick=
        r"<iframe[^>]*>",
        r"<object[^>]*>",
        r"<embed[^>]*>",
    ]
    
    @staticmethod
    def sanitize_string(value, max_length=None, allow_special_chars=True):
        """
        Sanitize a string input
        
        Args:
            value: String to sanitize
            max_length: Maximum allowed length
            allow_special_chars: Whether to allow special characters
            
        Returns:
            str: Sanitized string
        """
        if value is None:
            return ''
        
        # Convert to string
        value = str(value).strip()
        
        # Remove null bytes
        value = value.replace('\x00', '')
        
        # Escape HTML entities to prevent XSS
        # Requirement: 10.9
        value = html.escape(value)
        
        # Check for SQL injection patterns
        # Requirement: 10.9
        for pattern in InputSanitizer.SQL_INJECTION_PATTERNS:
            if re.search(pattern, value, re.IGNORECASE):
                # Log suspicious activity
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f'Potential SQL injection attempt detected: {value[:100]}')
                # Remove the suspicious pattern
                value = re.sub(pattern, '', value, flags=re.IGNORECASE)
        
        # Check for XSS patterns
        # Requirement: 10.9
        for pattern in InputSanitizer.XSS_PATTERNS:
            if re.search(pattern, value, re.IGNORECASE):
                # Log suspicious activity
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f'Potential XSS attempt detected: {value[:100]}')
                # Remove the suspicious pattern
                value = re.sub(pattern, '', value, flags=re.IGNORECASE)
        
        # Limit special characters if requested
        if not allow_special_chars:
            # Allow only alphanumeric, spaces, and basic punctuation
            value = re.sub(r'[^a-zA-Z0-9\s\-_.,@]', '', value)
        
        # Enforce maximum length
        if max_length and len(value) > max_length:
            value = value[:max_length]
        
        return value
    
    @staticmethod
    def sanitize_name(name):
        """
        Sanitize a person's name (borrower name, etc.)
        Allows letters, spaces, hyphens, apostrophes
        Requirement: 10.10
        
        Args:
            name: Name to sanitize
            
        Returns:
            str: Sanitized name
        """
        if not name:
            return ''
        
        name = str(name).strip()
        
        # Remove null bytes
        name = name.replace('\x00', '')
        
        # Allow only letters, spaces, hyphens, apostrophes, and periods
        # This handles special characters in names correctly
        # Requirement: 10.10
        name = re.sub(r"[^a-zA-Z\s\-'.]", '', name)
        
        # Remove multiple spaces
        name = re.sub(r'\s+', ' ', name)
        
        # Limit length
        if len(name) > 100:
            name = name[:100]
        
        return name
    
    @staticmethod
    def sanitize_loan_number(loan_number):
        """
        Sanitize a loan number
        Allows only alphanumeric and hyphens
        Requirement: 10.10
        
        Args:
            loan_number: Loan number to sanitize
            
        Returns:
            str: Sanitized loan number
        """
        if not loan_number:
            return ''
        
        loan_number = str(loan_number).strip()
        
        # Remove null bytes
        loan_number = loan_number.replace('\x00', '')
        
        # Allow only alphanumeric and hyphens
        # Requirement: 10.10
        loan_number = re.sub(r'[^a-zA-Z0-9\-]', '', loan_number)
        
        # Limit length
        if len(loan_number) > 50:
            loan_number = loan_number[:50]
        
        return loan_number
    
    @staticmethod
    def sanitize_phone_number(phone):
        """
        Sanitize a phone number
        Allows only digits, spaces, hyphens, plus sign, and parentheses
        
        Args:
            phone: Phone number to sanitize
            
        Returns:
            str: Sanitized phone number
        """
        if not phone:
            return ''
        
        phone = str(phone).strip()
        
        # Remove null bytes
        phone = phone.replace('\x00', '')
        
        # Allow only digits, spaces, hyphens, plus, and parentheses
        phone = re.sub(r'[^0-9\s\-+()]', '', phone)
        
        # Limit length
        if len(phone) > 20:
            phone = phone[:20]
        
        return phone
    
    @staticmethod
    def sanitize_email(email):
        """
        Sanitize an email address
        
        Args:
            email: Email to sanitize
            
        Returns:
            str: Sanitized email
        """
        if not email:
            return ''
        
        email = str(email).strip().lower()
        
        # Remove null bytes
        email = email.replace('\x00', '')
        
        # Basic email format validation
        # Allow only valid email characters
        email = re.sub(r'[^a-z0-9@._\-+]', '', email)
        
        # Limit length
        if len(email) > 254:  # RFC 5321
            email = email[:254]
        
        return email
    
    @staticmethod
    def sanitize_text_area(text):
        """
        Sanitize text area input (like reason, notes, etc.)
        Removes dangerous HTML/JS but preserves line breaks
        
        Args:
            text: Text to sanitize
            
        Returns:
            str: Sanitized text
        """
        if not text:
            return ''
        
        text = str(text).strip()
        
        # Remove null bytes
        text = text.replace('\x00', '')
        
        # Strip all HTML tags
        text = strip_tags(text)
        
        # Escape remaining HTML entities
        text = html.escape(text)
        
        # Check for SQL injection patterns
        for pattern in InputSanitizer.SQL_INJECTION_PATTERNS:
            if re.search(pattern, text, re.IGNORECASE):
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f'Potential SQL injection in text area: {text[:100]}')
                text = re.sub(pattern, '', text, flags=re.IGNORECASE)
        
        # Limit length
        if len(text) > 5000:
            text = text[:5000]
        
        return text
    
    @staticmethod
    def sanitize_query_param(param):
        """
        Sanitize a query parameter from URL
        
        Args:
            param: Query parameter to sanitize
            
        Returns:
            str: Sanitized parameter
        """
        if not param:
            return ''
        
        param = str(param).strip()
        
        # Remove null bytes
        param = param.replace('\x00', '')
        
        # Escape HTML
        param = html.escape(param)
        
        # Check for injection attempts
        for pattern in InputSanitizer.SQL_INJECTION_PATTERNS:
            if re.search(pattern, param, re.IGNORECASE):
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f'Potential SQL injection in query param: {param[:100]}')
                return ''  # Return empty string for suspicious params
        
        for pattern in InputSanitizer.XSS_PATTERNS:
            if re.search(pattern, param, re.IGNORECASE):
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f'Potential XSS in query param: {param[:100]}')
                return ''  # Return empty string for suspicious params
        
        # Limit length
        if len(param) > 200:
            param = param[:200]
        
        return param
    
    @staticmethod
    def sanitize_form_data(data, field_types=None):
        """
        Sanitize all fields in a form data dictionary
        
        Args:
            data: Dictionary of form data
            field_types: Dictionary mapping field names to types
                        ('name', 'loan_number', 'phone', 'email', 'text', 'string')
            
        Returns:
            dict: Sanitized form data
        """
        if not data:
            return {}
        
        if field_types is None:
            field_types = {}
        
        sanitized = {}
        
        for key, value in data.items():
            field_type = field_types.get(key, 'string')
            
            if field_type == 'name':
                sanitized[key] = InputSanitizer.sanitize_name(value)
            elif field_type == 'loan_number':
                sanitized[key] = InputSanitizer.sanitize_loan_number(value)
            elif field_type == 'phone':
                sanitized[key] = InputSanitizer.sanitize_phone_number(value)
            elif field_type == 'email':
                sanitized[key] = InputSanitizer.sanitize_email(value)
            elif field_type == 'text':
                sanitized[key] = InputSanitizer.sanitize_text_area(value)
            else:  # 'string' or default
                sanitized[key] = InputSanitizer.sanitize_string(value)
        
        return sanitized


class SQLSafeQueryBuilder:
    """
    Helper class for building SQL-safe queries using Django ORM
    Ensures all queries use parameterized statements
    Requirement: 10.9
    """
    
    @staticmethod
    def build_filter_q(field_name, value, lookup='exact'):
        """
        Build a Q object for filtering with sanitized input
        
        Args:
            field_name: Name of the field to filter
            value: Value to filter by (will be sanitized)
            lookup: Django lookup type (exact, icontains, gte, etc.)
            
        Returns:
            Q: Django Q object for safe filtering
        """
        from django.db.models import Q
        
        # Sanitize the value
        if isinstance(value, str):
            value = InputSanitizer.sanitize_query_param(value)
        
        # Build the lookup string
        lookup_str = f'{field_name}__{lookup}' if lookup != 'exact' else field_name
        
        # Return Q object (Django ORM handles parameterization)
        return Q(**{lookup_str: value})
    
    @staticmethod
    def safe_order_by(queryset, field_name, descending=False):
        """
        Safely apply ordering to a queryset
        
        Args:
            queryset: Django queryset
            field_name: Field name to order by
            descending: Whether to order descending
            
        Returns:
            QuerySet: Ordered queryset
        """
        # Whitelist of allowed field names (prevent SQL injection via ORDER BY)
        allowed_fields = [
            'id', 'created_at', 'updated_at', 'loan_number', 'borrower__first_name',
            'borrower__last_name', 'disbursement_date', 'due_date', 'principal_amount',
            'total_amount', 'amount_paid', 'status', 'application_date'
        ]
        
        if field_name not in allowed_fields:
            # Log suspicious activity
            import logging
            logger = logging.getLogger(__name__)
            logger.warning(f'Attempt to order by non-whitelisted field: {field_name}')
            # Default to safe field
            field_name = 'created_at'
        
        order_field = f'-{field_name}' if descending else field_name
        return queryset.order_by(order_field)
