import pandas as pd
import pytest
from io import StringIO
from spotpython.utils.compare import check_identical_columns_and_rows, check_identical_columns_and_rows_with_tol

def test_check_exact_identical_columns_and_rows():
    # Test DataFrames
    df1 = pd.DataFrame({
        "A": [1, 2, 3],
        "B": [1, 2, 3],
        "C": [4, 5, 6]
    })
    
    df2 = pd.DataFrame({
        "X": [7, 8, 9],
        "Y": [10, 11, 12]
    })

    # Exact duplicates - should identify A and B as duplicates
    result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1")
    assert list(result_df.columns) == ["A", "C"], "Failed to remove duplicate columns accurately"

    # No duplicates - should not remove any columns
    result_df = check_identical_columns_and_rows(df2, "Test DataFrame 2")
    assert list(result_df.columns) == ["X", "Y"], "Incorrectly removed columns when there were none to remove"

def test_check_identical_columns_and_rows_with_tol():
    # Test DataFrame
    df1 = pd.DataFrame({
        "A": [1.00, 2.01, 3.00],
        "B": [1.01, 2.00, 3.01],
        "C": [4.00, 5.00, 6.00]
    })

    # Within-tolerance duplicates - should identify A and B as near duplicates
    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.05)
    assert list(result_df.columns) == ["A", "C"], "Failed to remove near-duplicate columns accurately"

    # No near duplicates within a small tolerance
    result_df = check_identical_columns_and_rows_with_tol(df1, "Test DataFrame 1", tolerance=0.001)
    assert list(result_df.columns) == ["A", "B", "C"], "Incorrectly removed columns when they are not near duplicates"

def test_remove_flag_behavior():
    # Test DataFrame
    df1 = pd.DataFrame({
        "A": [1, 2, 3],
        "B": [1, 2, 3],
        "C": [4, 5, 6]
    })
    
    # Remove exact duplicates
    result_df = check_identical_columns_and_rows(df1, "Test DataFrame 1", remove=True)
    assert list(result_df.columns) == ["A", "C"], "Failed to remove exact duplicate columns"

    df2 = pd.DataFrame({
        "X": [1, 2, 3],
        "Y": [1.01, 2, 3.01],
        "Z": [4, 5, 6]
    })

    # Remove near duplicates with remove flag
    result_df = check_identical_columns_and_rows_with_tol(df2, "Test DataFrame 2", tolerance=0.05, remove=True)
    assert list(result_df.columns) == ["X", "Z"], "Failed to remove near-duplicate columns within tolerance"

# Run the tests
if __name__ == "__main__":
    pytest.main()