"""Tests for the PDF Scanner module."""

import tempfile
from pathlib import Path

import pytest
from hypothesis import given, settings, strategies as st

from statement_processor.extraction.pdf_scanner import PDFScanner


# **Feature: credit-card-statement-processor, Property 1: PDF Scanner Returns Only PDF Files**
# **Validates: Requirements 1.1**
class TestPDFScannerProperty:
    """Property-based tests for PDFScanner."""

    @given(
        pdf_count=st.integers(min_value=0, max_value=10),
        other_count=st.integers(min_value=0, max_value=10),
        other_extensions=st.lists(
            st.sampled_from([".txt", ".doc", ".xlsx", ".jpg", ".png", ".csv", ".xml"]),
            min_size=0,
            max_size=10,
        ),
    )
    @settings(max_examples=100)
    def test_scanner_returns_only_pdf_files(
        self, pdf_count: int, other_count: int, other_extensions: list
    ):
        """Property 1: PDF Scanner Returns Only PDF Files.
        
        For any directory containing a mix of file types, the PDF scanner SHALL
        return only files with .pdf extension, and the count of returned files
        SHALL equal the count of actual PDF files in the directory.
        """
        with tempfile.TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            
            # Create PDF files
            for i in range(pdf_count):
                (tmp_path / f"file_{i}.pdf").touch()
            
            # Create non-PDF files with various extensions
            for i, ext in enumerate(other_extensions[:other_count]):
                (tmp_path / f"other_{i}{ext}").touch()
            
            # Scan the directory
            scanner = PDFScanner(str(tmp_path))
            result = scanner.scan()
            
            # Property: All returned files must have .pdf extension
            for pdf_path in result:
                assert pdf_path.suffix.lower() == ".pdf", (
                    f"Scanner returned non-PDF file: {pdf_path}"
                )
            
            # Property: Count must equal actual PDF count
            assert len(result) == pdf_count, (
                f"Expected {pdf_count} PDFs, got {len(result)}"
            )


class TestPDFScannerEdgeCases:
    """Unit tests for PDF scanner edge cases."""

    def test_empty_directory(self):
        """Test scanning an empty directory returns empty list."""
        with tempfile.TemporaryDirectory() as tmpdir:
            scanner = PDFScanner(tmpdir)
            result = scanner.scan()
            assert result == []

    def test_non_existent_directory(self):
        """Test scanning a non-existent directory returns empty list."""
        scanner = PDFScanner("/non/existent/path/that/does/not/exist")
        result = scanner.scan()
        assert result == []

    def test_nested_directories_not_scanned(self):
        """Test that nested directories are not scanned (non-recursive)."""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            
            # Create PDF in root
            (tmp_path / "root.pdf").touch()
            
            # Create nested directory with PDF
            nested = tmp_path / "nested"
            nested.mkdir()
            (nested / "nested.pdf").touch()
            
            scanner = PDFScanner(str(tmp_path))
            result = scanner.scan()
            
            # Should only find root.pdf, not nested/nested.pdf
            assert len(result) == 1
            assert result[0].name == "root.pdf"

    def test_validate_directory_exists(self):
        """Test validate_directory returns True for existing directory."""
        with tempfile.TemporaryDirectory() as tmpdir:
            scanner = PDFScanner(tmpdir)
            assert scanner.validate_directory() is True

    def test_validate_directory_not_exists(self):
        """Test validate_directory returns False for non-existent directory."""
        scanner = PDFScanner("/non/existent/path")
        assert scanner.validate_directory() is False

    def test_case_insensitive_pdf_extension(self):
        """Test that .PDF and .Pdf extensions are also detected."""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            
            (tmp_path / "lower.pdf").touch()
            (tmp_path / "upper.PDF").touch()
            (tmp_path / "mixed.Pdf").touch()
            
            scanner = PDFScanner(str(tmp_path))
            result = scanner.scan()
            
            assert len(result) == 3
