"""Flashscore HTML parsers"""
from bs4 import BeautifulSoup
from typing import List, Optional
from datetime import datetime
import re
import logging
from app.data_sources.base import MatchData, LeagueData, TeamData

logger = logging.getLogger(__name__)


class FlashscoreParser:
    """Parser for Flashscore HTML"""
    
    @staticmethod
    def parse_leagues(html: str) -> List[LeagueData]:
        """Parse leagues from HTML"""
        # This is a simplified parser - Flashscore structure may vary
        # In production, you'd need to inspect the actual HTML structure
        leagues = []
        soup = BeautifulSoup(html, "html.parser")
        
        # Example: find league links (adjust selectors based on actual structure)
        league_links = soup.find_all("a", href=re.compile(r"/football/.*/.*/"))
        
        for link in league_links[:50]:  # Limit for MVP
            href = link.get("href", "")
            name = link.get_text(strip=True)
            
            if name and href:
                # Extract league ID from URL
                match = re.search(r"/football/([^/]+)/([^/]+)/", href)
                if match:
                    source_id = match.group(2)
                    leagues.append(LeagueData(
                        source_id=source_id,
                        name=name,
                        country=None
                    ))
        
        return leagues
    
    @staticmethod
    def parse_matches(html: str, league_source_id: str) -> List[MatchData]:
        """Parse matches from HTML"""
        matches = []
        soup = BeautifulSoup(html, "html.parser")
        
        # This is a simplified parser - adjust based on actual Flashscore structure
        # Flashscore uses dynamic content, so this may need Playwright for JS rendering
        
        # Example: find match rows
        match_rows = soup.find_all("div", class_=re.compile(r".*match.*", re.I))
        
        for row in match_rows:
            try:
                # Extract match data (adjust selectors)
                home_team_elem = row.find("span", class_=re.compile(r".*home.*", re.I))
                away_team_elem = row.find("span", class_=re.compile(r".*away.*", re.I))
                date_elem = row.find("span", class_=re.compile(r".*date.*", re.I))
                score_elem = row.find("span", class_=re.compile(r".*score.*", re.I))
                
                if not home_team_elem or not away_team_elem:
                    continue
                
                home_team = home_team_elem.get_text(strip=True)
                away_team = away_team_elem.get_text(strip=True)
                
                # Extract match ID from data attributes or href
                match_id = row.get("data-id") or row.get("id", "")
                if not match_id:
                    # Try to extract from link
                    link = row.find("a")
                    if link:
                        href = link.get("href", "")
                        match = re.search(r"/match/([^/]+)", href)
                        if match:
                            match_id = match.group(1)
                
                # Parse date
                match_date = datetime.now()  # Default
                if date_elem:
                    date_str = date_elem.get_text(strip=True)
                    # Parse date string (adjust format)
                    try:
                        match_date = datetime.strptime(date_str, "%d.%m.%Y %H:%M")
                    except:
                        pass
                
                # Parse score
                home_score = None
                away_score = None
                status = "scheduled"
                
                if score_elem:
                    score_text = score_elem.get_text(strip=True)
                    if ":" in score_text:
                        try:
                            parts = score_text.split(":")
                            home_score = int(parts[0])
                            away_score = int(parts[1])
                            status = "finished"
                        except:
                            pass
                
                matches.append(MatchData(
                    source_id=match_id or f"{home_team}_{away_team}",
                    league_source_id=league_source_id,
                    home_team_name=home_team,
                    away_team_name=away_team,
                    match_date=match_date,
                    status=status,
                    home_score=home_score,
                    away_score=away_score,
                    source_url=f"https://www.flashscore.com/match/{match_id}" if match_id else None
                ))
            except Exception as e:
                logger.warning(f"Error parsing match row: {e}")
                continue
        
        return matches
    
    @staticmethod
    def parse_match_details(html: str, match_source_id: str) -> Optional[MatchData]:
        """Parse detailed match information"""
        # Similar to parse_matches but for single match page
        # This would extract more details like events, lineups, etc.
        soup = BeautifulSoup(html, "html.parser")
        
        # Implementation similar to parse_matches but for detail page
        # Return MatchData with more complete information
        return None

