# -*- coding: utf-8 -*-
"""
Stream Scraper for BoomerStreamer
Handles scraping and resolving video streams from hellspy.to
Based on Stremio Hellspy addon approach
"""

import re
import urllib.parse
import urllib.request
import urllib.error
import json
import http.cookiejar
import xbmc
import xbmcaddon

ADDON = xbmcaddon.Addon()


class SimpleSession:
    """Simple session class using urllib with cookie support"""

    def __init__(self):
        self.cookies = http.cookiejar.CookieJar()
        self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cookies))
        self.headers = {}

    def update_headers(self, headers):
        self.headers.update(headers)

    def get(self, url, params=None, timeout=15, allow_redirects=True):
        if params:
            url = url + '?' + urllib.parse.urlencode(params)
        req = urllib.request.Request(url, headers=self.headers)
        try:
            response = self.opener.open(req, timeout=timeout)
            return SimpleResponse(response)
        except urllib.error.HTTPError as e:
            return SimpleResponse(e)

    def post(self, url, data=None, headers=None, timeout=15, allow_redirects=True):
        post_headers = self.headers.copy()
        if headers:
            post_headers.update(headers)
        post_data = urllib.parse.urlencode(data).encode('utf-8') if data else None
        req = urllib.request.Request(url, data=post_data, headers=post_headers)
        try:
            response = self.opener.open(req, timeout=timeout)
            return SimpleResponse(response)
        except urllib.error.HTTPError as e:
            return SimpleResponse(e)

    def head(self, url, allow_redirects=True, timeout=10):
        req = urllib.request.Request(url, headers=self.headers, method='HEAD')
        try:
            response = self.opener.open(req, timeout=timeout)
            return SimpleResponse(response)
        except urllib.error.HTTPError as e:
            return SimpleResponse(e)

    def get_cookies_dict(self):
        return {c.name: c.value for c in self.cookies}


class SimpleResponse:
    """Simple response wrapper"""

    def __init__(self, response):
        self._response = response
        self.url = response.geturl() if hasattr(response, 'geturl') else ''
        self.ok = 200 <= response.code < 400 if hasattr(response, 'code') else False
        self.status_code = response.code if hasattr(response, 'code') else 0
        self.headers = dict(response.headers) if hasattr(response, 'headers') else {}
        self._text = None

    @property
    def text(self):
        if self._text is None:
            try:
                self._text = self._response.read().decode('utf-8', errors='replace')
            except:
                self._text = ''
        return self._text

    def raise_for_status(self):
        if not self.ok:
            raise urllib.error.HTTPError(self.url, self.status_code, '', self.headers, None)


class StreamScraper:
    """Scrapes video streams from various sources"""

    def __init__(self):
        self.session = SimpleSession()
        self.session.update_headers({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'cs-CZ,cs;q=0.9,en;q=0.8',
            'Connection': 'keep-alive',
        })

        # List of available scrapers
        self.scrapers = [
            HellspyScraper(self.session),
        ]
    
    def search_streams(self, title, year='', imdb_id='', media_type='movie', season=None, episode=None):
        """Search for streams across all configured sources"""
        all_streams = []
        
        for scraper in self.scrapers:
            try:
                if not scraper.is_enabled():
                    continue
                
                streams = scraper.search(
                    title=title,
                    year=year,
                    imdb_id=imdb_id,
                    media_type=media_type,
                    season=season,
                    episode=episode
                )
                
                if streams:
                    all_streams.extend(streams)
                    
            except Exception as e:
                xbmc.log(f"[BoomerStreamer] Scraper {scraper.name} error: {str(e)}", xbmc.LOGERROR)
        
        # Sort streams by quality
        quality_order = {'4K': 0, '2160p': 0, '1080p': 1, '720p': 2, '480p': 3, 'SD': 4, 'Unknown': 5}
        all_streams.sort(key=lambda x: quality_order.get(x.get('quality', 'Unknown'), 5))
        
        return all_streams


class BaseScraper:
    """Base class for all scrapers"""
    
    name = "Base Scraper"
    base_url = ""
    
    def __init__(self, session):
        self.session = session
    
    def is_enabled(self):
        return True
    
    def search(self, title, year='', imdb_id='', media_type='movie', season=None, episode=None):
        raise NotImplementedError
    
    def _clean_title(self, title):
        """Clean title for search"""
        # Remove special characters but keep spaces
        title = re.sub(r'[^\w\s\-]', '', title)
        title = re.sub(r'\s+', ' ', title)
        return title.strip()
    
    def _get_page(self, url, params=None, allow_redirects=True):
        """Fetch a page and return HTML text"""
        try:
            xbmc.log(f"[BoomerStreamer] Fetching: {url}", xbmc.LOGDEBUG)
            response = self.session.get(url, params=params, timeout=15, allow_redirects=allow_redirects)
            response.raise_for_status()
            return response.text
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Error fetching {url}: {str(e)}", xbmc.LOGERROR)
            return None
    
    def _extract_quality(self, text):
        """Extract video quality from text"""
        text = text.upper()
        if '4K' in text or '2160' in text or 'UHD' in text:
            return '4K'
        elif '1080' in text or 'FULLHD' in text or 'FULL HD' in text:
            return '1080p'
        elif '720' in text or 'HD' in text:
            return '720p'
        elif '480' in text:
            return '480p'
        elif '360' in text or 'SD' in text:
            return 'SD'
        return 'Unknown'
    
    def _extract_language(self, text):
        """Extract language from text"""
        text = text.lower()
        if 'cz' in text or 'česk' in text or 'czech' in text:
            return 'CZ'
        elif 'sk' in text or 'sloven' in text:
            return 'SK'
        elif 'en' in text or 'eng' in text or 'angl' in text:
            return 'EN'
        return ''


class HellspyScraper(BaseScraper):
    """
    Scraper for hellspy.to
    Based on analysis of Stremio Hellspy addon
    """
    
    name = "Hellspy"
    base_url = "https://hellspy.to"
    login_url = "https://hellspy.to/login"
    
    def __init__(self, session):
        super().__init__(session)
        self.logged_in = False
    
    def is_enabled(self):
        try:
            return ADDON.getSettingBool('enable_hellspy')
        except:
            return True
    
    def login(self):
        """Login to hellspy.to"""
        if self.logged_in:
            return True
        
        try:
            # Get credentials from settings or use defaults
            try:
                email = ADDON.getSetting('hellspy_email')
                password = ADDON.getSetting('hellspy_password')
            except:
                email = ''
                password = ''
            
            # Use provided credentials if settings are empty
            if not email or not password:
                email = 'madmaxx.cz@gmail.com'
                password = '.Pepazdepa.hs'
            
            xbmc.log(f"[BoomerStreamer] Logging in to Hellspy as {email}", xbmc.LOGINFO)
            
            # First get the login page to get any CSRF tokens
            login_page = self._get_page(self.login_url)
            
            # Find CSRF token if present
            csrf_token = ''
            if login_page:
                # Look for common CSRF token patterns
                csrf_patterns = [
                    r'name=["\']_token["\'][^>]*value=["\']([^"\']+)["\']',
                    r'name=["\']csrf["\'][^>]*value=["\']([^"\']+)["\']',
                    r'name=["\']csrf_token["\'][^>]*value=["\']([^"\']+)["\']',
                    r'value=["\']([^"\']+)["\'][^>]*name=["\']_token["\']',
                ]
                for pattern in csrf_patterns:
                    match = re.search(pattern, login_page, re.IGNORECASE)
                    if match:
                        csrf_token = match.group(1)
                        xbmc.log(f"[BoomerStreamer] Found CSRF token", xbmc.LOGDEBUG)
                        break
            
            # Prepare login data
            login_data = {
                'email': email,
                'password': password,
                'remember': '1',
            }
            
            if csrf_token:
                login_data['_token'] = csrf_token
            
            # Set headers for login
            headers = {
                'Content-Type': 'application/x-www-form-urlencoded',
                'Referer': self.login_url,
                'Origin': self.base_url,
            }
            
            # Perform login
            response = self.session.post(
                self.login_url,
                data=login_data,
                headers=headers,
                timeout=15,
                allow_redirects=True
            )
            
            # Check if login was successful
            # Usually redirects to homepage or dashboard after successful login
            if response.ok:
                # Log cookies
                cookies_count = len(self.session.cookies)
                xbmc.log(f"[BoomerStreamer] Got {cookies_count} cookies after login", xbmc.LOGINFO)
                for cookie in self.session.cookies:
                    xbmc.log(f"[BoomerStreamer] Cookie: {cookie.name}={cookie.value[:20]}...", xbmc.LOGDEBUG)
                
                # Check for common login failure indicators
                response_text = response.text.lower()
                if 'logout' in response_text or 'odhlásit' in response_text or 'profil' in response_text:
                    self.logged_in = True
                    xbmc.log(f"[BoomerStreamer] Successfully logged in to Hellspy", xbmc.LOGINFO)
                    return True
                elif 'špatné heslo' in response_text or 'wrong password' in response_text or 'invalid' in response_text:
                    xbmc.log(f"[BoomerStreamer] Login failed - wrong credentials", xbmc.LOGERROR)
                    return False
                else:
                    # Assume success if we got a response and no obvious error
                    self.logged_in = True
                    xbmc.log(f"[BoomerStreamer] Login appears successful (no error detected)", xbmc.LOGINFO)
                    return True
            else:
                xbmc.log(f"[BoomerStreamer] Login failed - HTTP {response.status_code}", xbmc.LOGERROR)
                return False
                
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Login error: {str(e)}", xbmc.LOGERROR)
            return False
    
    def search(self, title, year='', imdb_id='', media_type='movie', season=None, episode=None):
        """Search for streams on hellspy.to"""
        streams = []
        
        # Login first
        self.login()
        
        try:
            # Build search query
            search_query = self._clean_title(title)
            
            # For TV shows, add season/episode info
            if media_type == 'episode' and season and episode:
                # Try different formats
                queries = [
                    f"{search_query} S{int(season):02d}E{int(episode):02d}",
                    f"{search_query} {int(season)}x{int(episode):02d}",
                    f"{search_query} s{int(season):02d}e{int(episode):02d}",
                ]
            else:
                # For movies, try with and without year
                if year:
                    queries = [
                        f"{search_query} {year}",
                        search_query,
                    ]
                else:
                    queries = [search_query]
            
            # Try each query
            for query in queries:
                xbmc.log(f"[BoomerStreamer] Hellspy searching: {query}", xbmc.LOGINFO)
                
                found_streams = self._search_hellspy(query, title, year)
                if found_streams:
                    streams.extend(found_streams)
                    break  # Stop if we found something
            
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Hellspy search error: {str(e)}", xbmc.LOGERROR)
        
        return streams
    
    def _search_hellspy(self, query, original_title, year=''):
        """Perform actual search on hellspy.to"""
        streams = []
        
        try:
            # Hellspy search URL - correct format: https://hellspy.to/?query=search+terms
            # Replace spaces with + for the query parameter
            search_query = query.replace(' ', '+')
            search_url = f"{self.base_url}/?query={search_query}"
            
            xbmc.log(f"[BoomerStreamer] Search URL: {search_url}", xbmc.LOGINFO)
            
            html = self._get_page(search_url)
            
            if not html:
                xbmc.log(f"[BoomerStreamer] No HTML returned from hellspy search", xbmc.LOGWARNING)
                return streams
            
            xbmc.log(f"[BoomerStreamer] Got HTML response, length: {len(html)}", xbmc.LOGDEBUG)
            
            # Parse search results - look for video links
            # Hellspy video URLs are in format: /video/HASH/ID or https://hellspy.to/video/...
            video_patterns = [
                r'href=["\']([^"\']*?/video/[^"\']+)["\']',
                r'href=["\']([^"\']*?/soubor/[^"\']+)["\']',
                r'href=["\']([^"\']*?/file/[^"\']+)["\']',
            ]
            
            found_urls = set()
            for pattern in video_patterns:
                matches = re.findall(pattern, html, re.IGNORECASE)
                xbmc.log(f"[BoomerStreamer] Pattern {pattern[:30]}... found {len(matches)} matches", xbmc.LOGDEBUG)
                for url in matches:
                    if url not in found_urls:
                        found_urls.add(url)
            
            xbmc.log(f"[BoomerStreamer] Found {len(found_urls)} video URLs", xbmc.LOGINFO)
            
            # Process found URLs
            for url in list(found_urls)[:15]:  # Limit to 15 results
                try:
                    # Make URL absolute
                    if not url.startswith('http'):
                        if url.startswith('//'):
                            url = 'https:' + url
                        elif url.startswith('/'):
                            url = self.base_url + url
                        else:
                            url = self.base_url + '/' + url
                    
                    xbmc.log(f"[BoomerStreamer] Processing video URL: {url}", xbmc.LOGDEBUG)
                    
                    # Get stream info from video page
                    stream_info = self._get_video_stream(url, original_title)
                    if stream_info:
                        streams.append(stream_info)
                
                except Exception as e:
                    xbmc.log(f"[BoomerStreamer] Error processing URL {url}: {str(e)}", xbmc.LOGDEBUG)
                    continue
            
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Hellspy search error: {str(e)}", xbmc.LOGERROR)
        
        return streams
    
    def _get_video_stream(self, video_url, original_title=''):
        """Get stream info from a hellspy video page"""
        try:
            html = self._get_page(video_url)
            if not html:
                return None
            
            # Extract title/filename
            filename = ''
            title_match = re.search(r'<h1[^>]*>([^<]+)</h1>', html)
            if not title_match:
                title_match = re.search(r'<title>([^<]+)</title>', html)
            if title_match:
                filename = title_match.group(1).strip()
                # Clean up title
                filename = re.sub(r'\s*[-|]\s*[Hh]ellspy.*$', '', filename)
            
            # Extract file size - try multiple patterns
            size = ''
            size_patterns = [
                r'(\d+(?:[.,]\d+)?\s*GB)',
                r'(\d+(?:[.,]\d+)?\s*MB)',
                r'Velikost[:\s]*(\d+(?:[.,]\d+)?\s*(?:GB|MB))',
                r'Size[:\s]*(\d+(?:[.,]\d+)?\s*(?:GB|MB))',
                r'>(\d+(?:[.,]\d+)?\s*(?:GB|MB))<',
            ]
            for pattern in size_patterns:
                size_match = re.search(pattern, html, re.IGNORECASE)
                if size_match:
                    size = size_match.group(1).upper().replace(',', '.').strip()
                    break
            
            # Determine quality and language from filename and page content
            quality = self._extract_quality(filename + ' ' + html[:5000])
            language = self._extract_language(filename + ' ' + html[:5000])
            
            # Find the actual stream/download URL
            stream_url = self._extract_stream_url(html, video_url)
            
            if not stream_url:
                # Use the video page URL as fallback - Kodi might be able to handle it
                stream_url = video_url
            
            # Clean the URL from unicode escapes
            stream_url = self._clean_url(stream_url)
            
            xbmc.log(f"[BoomerStreamer] Stream: {filename[:50]} | Quality: {quality} | Size: {size} | Lang: {language}", xbmc.LOGINFO)
            
            # Get cookies for authentication from the session
            cookies_dict = self.session.get_cookies_dict()
            cookies_str = '; '.join([f'{k}={v}' for k, v in cookies_dict.items()])
            
            if cookies_str:
                xbmc.log(f"[BoomerStreamer] Got {len(cookies_dict)} cookies", xbmc.LOGINFO)
            else:
                xbmc.log(f"[BoomerStreamer] WARNING: No cookies!", xbmc.LOGWARNING)
            
            return {
                'url': stream_url,
                'quality': quality,
                'source': self.name,
                'size': size,
                'language': language,
                'filename': filename,
                'page_url': video_url,
                'cookies': cookies_str,
            }
            
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Error getting video stream: {str(e)}", xbmc.LOGERROR)
            return None
    
    def _clean_url(self, url):
        """Clean URL from escape sequences"""
        if not url:
            return url
        # Decode unicode escapes like \u0026 -> &
        try:
            url = url.encode().decode('unicode_escape')
        except:
            pass
        # Decode HTML entities
        url = url.replace('&amp;', '&')
        url = url.replace('&#38;', '&')
        url = url.replace('&#x26;', '&')
        return url
    
    def _extract_stream_url(self, html, base_url):
        """Extract actual stream URL from HTML"""
        
        # Pattern priority: direct video > player source > download link > iframe
        
        # 1. Direct video file URLs in the page
        video_url_patterns = [
            r'(https?://[^"\'\s<>]+\.(?:mp4|mkv|avi|m3u8)(?:\?[^"\'\s<>]*)?)',
        ]
        
        for pattern in video_url_patterns:
            matches = re.findall(pattern, html, re.IGNORECASE)
            for url in matches:
                # Skip thumbnails, posters, etc.
                if any(skip in url.lower() for skip in ['thumb', 'poster', 'preview', 'banner', 'logo']):
                    continue
                return self._clean_url(url)
        
        # 2. JavaScript player source
        js_patterns = [
            r'file\s*[=:]\s*["\']([^"\']+)["\']',
            r'source\s*[=:]\s*["\']([^"\']+)["\']',
            r'video_url\s*[=:]\s*["\']([^"\']+)["\']',
            r'src\s*[=:]\s*["\']([^"\']+\.(?:mp4|m3u8|mkv))["\']',
            r'"file"\s*:\s*"([^"]+)"',
            r'"src"\s*:\s*"([^"]+\.(?:mp4|m3u8))"',
        ]
        
        for pattern in js_patterns:
            match = re.search(pattern, html, re.IGNORECASE)
            if match:
                url = match.group(1)
                if not url.startswith('http'):
                    url = urllib.parse.urljoin(base_url, url)
                return self._clean_url(url)
        
        # 3. Download/Stream button
        button_patterns = [
            r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>\s*(?:Stáhnout|Download|Stream|Přehrát|Play)[^<]*</a>',
            r'<a[^>]+class=["\'][^"\']*(?:download|stream|play|btn-primary)[^"\']*["\'][^>]+href=["\']([^"\']+)["\']',
            r'<button[^>]+data-url=["\']([^"\']+)["\']',
        ]
        
        for pattern in button_patterns:
            match = re.search(pattern, html, re.IGNORECASE)
            if match:
                url = match.group(1)
                if not url.startswith('http'):
                    url = urllib.parse.urljoin(base_url, url)
                return self._clean_url(url)
        
        # 4. Iframe player
        iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', html, re.IGNORECASE)
        if iframe_match:
            url = iframe_match.group(1)
            if any(x in url.lower() for x in ['player', 'embed', 'video', 'stream']):
                if not url.startswith('http'):
                    url = 'https:' + url if url.startswith('//') else urllib.parse.urljoin(base_url, url)
                return self._clean_url(url)
        
        return None
    
    def _find_embedded_player(self, html):
        """Find video streams from embedded players in the page"""
        streams = []
        
        try:
            # Look for video.js, JW Player, or similar
            player_patterns = [
                r'sources\s*:\s*\[([^\]]+)\]',
                r'playlist\s*:\s*\[([^\]]+)\]',
                r'"sources"\s*:\s*\[([^\]]+)\]',
            ]
            
            for pattern in player_patterns:
                match = re.search(pattern, html, re.IGNORECASE | re.DOTALL)
                if match:
                    sources = match.group(1)
                    urls = re.findall(r'["\']?(https?://[^"\'<>\s]+\.(?:mp4|m3u8|mpd))["\']?', sources)
                    for url in urls:
                        streams.append({
                            'url': url,
                            'quality': self._extract_quality(url),
                            'source': self.name + ' (Player)',
                            'size': '',
                            'language': '',
                        })
            
            # HTML5 video tag
            video_matches = re.findall(r'<source[^>]+src=["\']([^"\']+)["\']', html, re.IGNORECASE)
            for url in video_matches:
                if not url.startswith('http'):
                    url = urllib.parse.urljoin(self.base_url, url)
                streams.append({
                    'url': url,
                    'quality': self._extract_quality(url),
                    'source': self.name + ' (Video)',
                    'size': '',
                    'language': '',
                })
        
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Error finding embedded player: {str(e)}", xbmc.LOGDEBUG)
        
        return streams


class ResolverManager:
    """Manages URL resolvers for various hosting services"""

    def __init__(self):
        self.session = SimpleSession()
        self.session.update_headers({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': '*/*',
            'Accept-Language': 'cs-CZ,cs;q=0.9,en;q=0.8',
        })
    
    def resolve(self, url):
        """Resolve a URL to its direct stream URL"""
        if not url:
            return None
        
        try:
            xbmc.log(f"[BoomerStreamer] Resolving: {url}", xbmc.LOGDEBUG)
            
            # Check if already a direct video URL
            if self._is_direct_video(url):
                return url
            
            # Try to resolve the URL
            resolved = self._resolve_generic(url)
            if resolved:
                return resolved
            
            # If can't resolve, return original URL (might work with follow redirects)
            return url
            
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Resolver error for {url}: {str(e)}", xbmc.LOGERROR)
            return url  # Return original URL as fallback
    
    def _is_direct_video(self, url):
        """Check if URL is a direct video file"""
        video_extensions = ['.mp4', '.mkv', '.avi', '.m3u8', '.mpd', '.webm', '.flv']
        url_lower = url.lower().split('?')[0]  # Remove query params for check
        return any(url_lower.endswith(ext) for ext in video_extensions)
    
    def _resolve_generic(self, url):
        """Generic resolver - follows redirects and looks for video URLs"""
        try:
            # First, try HEAD request to check for redirect
            try:
                head_response = self.session.head(url, allow_redirects=True, timeout=10)
                if self._is_direct_video(head_response.url):
                    return head_response.url
            except:
                pass
            
            # Get the page content
            response = self.session.get(url, allow_redirects=True, timeout=15)
            
            # Check if final URL is video
            if self._is_direct_video(response.url):
                return response.url
            
            # Check content type
            content_type = response.headers.get('content-type', '').lower()
            if any(vt in content_type for vt in ['video/', 'application/octet-stream']):
                return response.url
            
            # Look for video URL patterns in page source
            patterns = [
                # Direct file URLs
                r'(https?://[^"\'\s<>]+\.(?:mp4|m3u8|mpd)(?:\?[^"\'\s<>]*)?)',
                # JavaScript player sources
                r'file\s*[=:]\s*["\']([^"\']+)["\']',
                r'src\s*[=:]\s*["\']([^"\']+\.(?:mp4|m3u8|mpd))["\']',
                r'source\s*[=:]\s*["\']([^"\']+)["\']',
                r'video_url\s*[=:]\s*["\']([^"\']+)["\']',
                r'stream_url\s*[=:]\s*["\']([^"\']+)["\']',
                # HTML5 video
                r'<source[^>]+src=["\']([^"\']+\.(?:mp4|m3u8|mpd))["\']',
                r'<video[^>]+src=["\']([^"\']+)["\']',
            ]
            
            for pattern in patterns:
                matches = re.findall(pattern, response.text, re.IGNORECASE)
                for match in matches:
                    video_url = match
                    # Skip data URIs and blobs
                    if video_url.startswith('data:') or video_url.startswith('blob:'):
                        continue
                    if not video_url.startswith('http'):
                        video_url = urllib.parse.urljoin(url, video_url)
                    # Verify it looks like a video URL
                    if any(ext in video_url.lower() for ext in ['.mp4', '.m3u8', '.mpd', '.mkv', '.avi']):
                        return video_url
            
            return None
            
        except Exception as e:
            xbmc.log(f"[BoomerStreamer] Generic resolver error: {str(e)}", xbmc.LOGERROR)
            return None
