from git import Repo
from typing import List, Dict, Any
from datetime import datetime
import tempfile
import shutil
import os
import re

class EnhancedGitAnalyzer:
    def __init__(self, repo_url: str, start_date: str = None, end_date: str = None):
        self.repo_url = repo_url
        self.start_date = datetime.strptime(start_date, "%Y-%m-%d") if start_date else None
        self.end_date = datetime.strptime(end_date, "%Y-%m-%d") if end_date else None
        self.temp_dir = None
        self.repo = None

    def __enter__(self):
        self.temp_dir = tempfile.mkdtemp()
        self.repo = Repo.clone_from(self.repo_url, self.temp_dir)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.temp_dir:
            shutil.rmtree(self.temp_dir)

    def extract_all_commits(self) -> List[Dict[str, Any]]:
        """Extract all commits from the repository within the date range."""
        commits = []
        for commit in self.repo.iter_commits():
            commit_date = datetime.fromtimestamp(commit.committed_date)
            
            # Apply date filters if specified
            if self.start_date and commit_date < self.start_date:
                continue
            if self.end_date and commit_date > self.end_date:
                continue

            # Get patch/diff content for each file
            file_changes = []
            try:
                # Get diff with parent commit
                if commit.parents:
                    diff = commit.diff(commit.parents[0])
                    for change in diff:
                        # Get the actual patch content
                        patch_content = None
                        try:
                            if hasattr(change, 'diff') and change.diff:
                                patch_content = change.diff.decode('utf-8', errors='ignore')
                            elif hasattr(change, 'a_blob') and hasattr(change, 'b_blob'):
                                # Alternative way to get content
                                if change.change_type == 'A' and change.b_blob:  # Added file
                                    patch_content = f"+{change.b_blob.data_stream.read().decode('utf-8', errors='ignore')}"
                                elif change.change_type == 'D' and change.a_blob:  # Deleted file
                                    patch_content = f"-{change.a_blob.data_stream.read().decode('utf-8', errors='ignore')}"
                        except Exception as patch_error:
                            # If patch extraction fails, continue without it
                            pass
                        
                        file_changes.append({
                            'file_path': change.a_path or change.b_path,
                            'change_type': change.change_type,
                            'patch': patch_content,
                            'insertions': change.stats.get('insertions', 0),
                            'deletions': change.stats.get('deletions', 0)
                        })
                else:
                    # For initial commit, just get file names
                    for file_path in commit.stats.files.keys():
                        file_changes.append({
                            'file_path': file_path,
                            'change_type': 'A',  # Added
                            'patch': None,
                            'insertions': commit.stats.files[file_path].get('insertions', 0),
                            'deletions': commit.stats.files[file_path].get('deletions', 0)
                        })
            except Exception as e:
                # Fallback if diff extraction fails
                for file_path in commit.stats.files.keys():
                    file_changes.append({
                        'file_path': file_path,
                        'change_type': 'M',  # Modified
                        'patch': None,
                        'insertions': commit.stats.files[file_path].get('insertions', 0),
                        'deletions': commit.stats.files[file_path].get('deletions', 0)
                    })

            commits.append({
                'hash': commit.hexsha,
                'author': commit.author.name,
                'email': commit.author.email,
                'date': commit_date.isoformat(),
                'message': commit.message,
                'files_changed': list(commit.stats.files.keys()),
                'insertions': commit.stats.total['insertions'],
                'deletions': commit.stats.total['deletions'],
                'file_changes': file_changes
            })
        
        return commits

    def get_repository_name(self) -> str:
        """Get the repository name from the URL."""
        return os.path.basename(self.repo_url).replace('.git', '') 

    def get_commit_url(self, commit_hash: str) -> str:
        """Generate the web URL for a commit given its hash."""
        # Support GitHub and GitLab style URLs
        if self.repo_url.startswith('git@'):
            # SSH URL: git@github.com:user/repo.git
            match = re.match(r'git@([^:]+):([^/]+)/([^/]+)\.git', self.repo_url)
            if match:
                host, user, repo = match.groups()
                return f"https://{host}/{user}/{repo}/commit/{commit_hash}"
        elif self.repo_url.startswith('http'):
            # HTTPS URL: https://github.com/user/repo.git
            match = re.match(r'https?://([^/]+)/([^/]+)/([^/]+)\.git', self.repo_url)
            if match:
                host, user, repo = match.groups()
                return f"https://{host}/{user}/{repo}/commit/{commit_hash}"
        # Fallback: just return the hash
        return commit_hash 