ytplayer/YTProcessing.py

import yt_dlp
import aiohttp
import asyncio
from _config import *
import zipfile
import io


class YTVideoInfo:
    def __init__(self, link: str):
        self.link = link

    def _fetch_sync(self) -> dict:
        with cache_lock:
            if self.link in info_cache:
                return info_cache[self.link]

        with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
            info = ydl.extract_info(self.link, download=False)

        with cache_lock:
            info_cache[self.link] = info

        return info

    async def fetch(self) -> dict:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, self._fetch_sync)

    def _validate_sync(self) -> tuple[bool, str]:
        try:
            info = self._fetch_sync()
            if not info:
                return False, 'Could not fetch video info.'
            return True, ''
        except yt_dlp.utils.DownloadError as e:
            msg = str(e)
            if 'Private video'   in msg: return False, 'This video is private.'
            if 'age'             in msg.lower(): return False, 'This video is age-restricted.'
            if 'unavailable'     in msg.lower(): return False, 'This video is unavailable.'
            return False, 'Could not load this video.'

    async def validate(self) -> tuple[bool, str]:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, self._validate_sync)

    def get_formats(self, info: dict) -> list[dict]:
        seen, formats = set(), []
        for f in reversed(info['formats']):
            height = f.get('height')
            fps    = f.get('fps')
            ext    = f.get('ext', '')
            if not height or f.get('vcodec', 'none') == 'none':
                continue
            key = (height, fps)
            if key in seen:
                continue
            seen.add(key)
            size      = f.get('filesize') or f.get('filesize_approx')
            size_str  = None
            if size:
                if size >= 1_073_741_824:
                    size_str = f"{size / 1_073_741_824:.1f} GB"
                elif size >= 1_048_576:
                    size_str = f"{size / 1_048_576:.1f} MB"
                else:
                    size_str = f"{size / 1024:.0f} KB"

            audio_size = 0
            for af in info['formats']:
                if (af.get('vcodec') == 'none'
                        and af.get('acodec') != 'none'
                        and af.get('ext') == 'm4a'):
                    audio_size = af.get('filesize') or af.get('filesize_approx') or 0
                    break

            total_size = (size or 0) + audio_size

            label = f"{height}p"
            if fps and fps > 30:
                label += f" {int(fps)}fps"
            label += f" ({ext.upper()})"
            if size_str:
                label += f" ~ {size_str}"

            formats.append({
                'id':         f['format_id'],
                'label':      label,
                'height':     height,
                'ext':        ext,
                'fps':        fps,
                'vcodec':     f.get('vcodec', ''),
                'acodec':     f.get('acodec', ''),
                'filesize':   total_size or None,
            })

        return sorted(formats, key=lambda x: x['height'], reverse=True)

    def get_subtitles(self, info: dict) -> list[dict]:
        subtitles = []
        for lang, tracks in info.get('subtitles', {}).items():
            if any(t.get('ext') == 'vtt' for t in tracks):
                subtitles.append({'lang': lang, 'label': f"{lang} (manual)", 'auto': '0'})
        for lang, tracks in info.get('automatic_captions', {}).items():
            if any(t.get('ext') == 'vtt' for t in tracks):
                subtitles.append({'lang': lang, 'label': f"{lang} (auto)", 'auto': '1'})
        return subtitles

    def get_subtitle_vtt_url(self, info: dict, lang: str, auto: bool) -> str | None:
        source = info.get('automatic_captions' if auto else 'subtitles', {})
        tracks = source.get(lang, [])
        return next((t['url'] for t in tracks if t.get('ext') == 'vtt'), None)

    def summary(self, info: dict) -> dict:
        raw_date = info.get('upload_date', '')
        n        = info.get('view_count', 0)

        def fmt_views(n):
            if not n: return 'N/A'
            if n >= 1_000_000: return f"{n/1_000_000:.1f}M"
            if n >= 1_000:     return f"{n/1_000:.1f}K"
            return str(n)

        def fmt_duration(s):
            if not s: return 'N/A'
            h, m, sec = s // 3600, (s % 3600) // 60, s % 60
            return f"{h}:{m:02}:{sec:02}" if h else f"{m}:{sec:02}"

        return {
            'title':       info.get('title', 'Video'),
            'uploader':    info.get('uploader', 'N/A'),
            'views':       fmt_views(n),
            'duration':    fmt_duration(info.get('duration')),
            'upload_date': f"{raw_date[:4]}-{raw_date[4:6]}-{raw_date[6:]}" if raw_date else 'N/A',
            'description': info.get('description', 'No description.'),
        }


class YTVideoStream:
    def __init__(self, link: str, format_id: str = 'best'):
        self.link      = link
        self.format_id = format_id

    def _get_urls_sync(self) -> tuple[str, str | None]:
        ydl_opts = {
            'format': f'{self.format_id}+bestaudio[ext=m4a]/bestvideo+bestaudio',
            'quiet':  True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(self.link, download=False)

        if 'requested_formats' in info and len(info['requested_formats']) == 2:
            return (info['requested_formats'][0]['url'],
                    info['requested_formats'][1]['url'])
        return info['url'], None

    async def _get_urls(self) -> tuple[str, str | None]:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, self._get_urls_sync)

    async def generate(
        self,
        prebuffer:  bool = True,
        stop_event: asyncio.Event | None = None,
    ):
        """
        Yields MP4 chunks.
        stop_event — when set, cleanly terminates FFmpeg and stops yielding.
        This allows the server to cancel an in-progress stream when the
        client reconnects with a new format_id.
        """
        video_url, audio_url = await self._get_urls()

        if audio_url:
            process = await asyncio.create_subprocess_exec(
                'ffmpeg',
                '-i', video_url,
                '-i', audio_url,
                '-c:v', 'copy',
                '-c:a', 'aac',
                '-b:a', '192k',
                '-g',   '60',
                '-f',   'mp4',
                '-movflags', 'frag_keyframe+empty_moov+faststart',
                '-frag_duration', '2000000',
                'pipe:1',
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.DEVNULL,
            )

            try:
                if prebuffer:
                    pre_buffer, buffered_mb = [], 0
                    while buffered_mb < PRE_BUFFER_MB:
                        if stop_event and stop_event.is_set():
                            return
                        try:
                            chunk = await asyncio.wait_for(
                                process.stdout.read(CHUNK_SIZE),
                                timeout=15.0
                            )
                        except asyncio.TimeoutError:
                            yield b''
                            continue
                        if not chunk:
                            for c in pre_buffer: yield c
                            return
                        pre_buffer.append(chunk)
                        buffered_mb += len(chunk) / (1024 * 1024)
                    for c in pre_buffer:
                        yield c
                else:
                    first = True
                    while first:
                        if stop_event and stop_event.is_set():
                            return
                        try:
                            chunk = await asyncio.wait_for(
                                process.stdout.read(CHUNK_SIZE),
                                timeout=15.0
                            )
                            if chunk:
                                yield chunk
                                first = False
                            else:
                                return
                        except asyncio.TimeoutError:
                            yield b''
                            continue

                while True:
                    if stop_event and stop_event.is_set():
                        return
                    try:
                        chunk = await asyncio.wait_for(
                            process.stdout.read(CHUNK_SIZE),
                            timeout=30.0
                        )
                    except asyncio.TimeoutError:
                        yield b''
                        continue
                    if not chunk:
                        break
                    yield chunk

            finally:
                try:
                    process.kill()
                    await process.wait()
                except Exception:
                    pass

        else:
            async with aiohttp.ClientSession() as session:
                async with session.get(video_url) as r:
                    async for chunk in r.content.iter_chunked(CHUNK_SIZE):
                        if stop_event and stop_event.is_set():
                            return
                        yield chunk


class YTVideoDownloader:
    def __init__(self, link: str, format_id: str = 'best'):
        self.link      = link
        self.format_id = format_id

    def _download_sync(self, progress_callback=None) -> tuple[str, str]:
        tmp_dir  = tempfile.mkdtemp()
        out_tmpl = os.path.join(tmp_dir, 'video.%(ext)s')

        ydl_opts = {
            'format': (
                f'{self.format_id}[ext=mp4]+bestaudio[ext=m4a]'
                f'/bestvideo[ext=mp4]+bestaudio[ext=m4a]'
                f'/best[ext=mp4]/best'
            ),
            'outtmpl':             out_tmpl,
            'merge_output_format': 'mp4',
            'quiet':               True,
            'no_warnings':         True,

            'concurrent_fragment_downloads': 4,

            'retries':          5,
            'fragment_retries': 5,

            'socket_timeout': 30,

            'postprocessors': [
                {
                    'key':            'FFmpegVideoRemuxer',
                    'preferedformat': 'mp4',
                },
            ],
            'postprocessor_args': {
                'ffmpeg': [
                    '-c:v', 'copy',
                    '-c:a', 'aac',
                    '-b:a', '192k',
                    '-movflags', '+faststart',
                ]
            },
        }

        if progress_callback:
            def hook(d):
                if d['status'] == 'downloading':
                    raw = d.get('_percent_str', '0%').strip().replace('%', '')
                    try:
                        pct = min(float(raw), 99.0)
                        progress_callback(pct)
                    except Exception:
                        pass
                elif d['status'] == 'finished':
                    progress_callback(99)

            ydl_opts['progress_hooks'] = [hook]

        try:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([self.link])
        except Exception as e:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            raise RuntimeError(f'yt-dlp failed: {e}')

        mp4_files = [
            f for f in os.listdir(tmp_dir)
            if f.endswith('.mp4')
        ]
        if not mp4_files:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            raise FileNotFoundError('No MP4 found after download.')

        return os.path.join(tmp_dir, mp4_files[0]), tmp_dir

    async def download(self, progress_callback=None) -> tuple[str, str]:
        loop = asyncio.get_event_loop()
        try:
            return await asyncio.wait_for(
                loop.run_in_executor(
                    executor,
                    lambda: self._download_sync(progress_callback)
                ),
                timeout=600
            )
        except asyncio.TimeoutError:
            raise TimeoutError('Download timed out after 10 minutes.')

    async def stream_file(self, filepath: str):
        file_size = os.path.getsize(filepath)

        async def generator():
            with open(filepath, 'rb') as f:
                while True:
                    chunk = f.read(CHUNK_SIZE)
                    if not chunk:
                        break
                    yield chunk

        return generator(), file_size


class YTPlaylist:
    def __init__(self, link: str):
        self.link = link

    def _fetch_sync(self) -> dict:
        ydl_opts = {
            'quiet':              True,
            'no_warnings':        True,
            'extract_flat':       False,
            'ignoreerrors':       True,
            'socket_timeout':     30,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            return ydl.extract_info(self.link, download=False)

    def _fetch_flat_sync(self) -> dict:
        ydl_opts = {
            'quiet':          True,
            'no_warnings':    True,
            'extract_flat':   True,
            'ignoreerrors':   True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            return ydl.extract_info(self.link, download=False)

    async def fetch_flat(self) -> dict:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, self._fetch_flat_sync)

    async def fetch(self) -> dict:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, self._fetch_sync)

    def get_entries(self, info: dict) -> list[dict]:
        entries = []
        for i, entry in enumerate(info.get('entries', []), 1):
            if not entry:
                continue
            entries.append({
                'index':    i,
                'id':       entry.get('id', ''),
                'title':    entry.get('title', f'Video {i}'),
                'url':      entry.get('url') or f"https://www.youtube.com/watch?v={entry.get('id')}",
                'duration': entry.get('duration'),
                'uploader': entry.get('uploader', ''),
                'thumbnail': entry.get('thumbnail', ''),
            })
        return entries

    def summary(self, info: dict) -> dict:
        entries = self.get_entries(info)
        return {
            'title':       info.get('title', 'Playlist'),
            'uploader':    info.get('uploader') or info.get('channel', 'N/A'),
            'video_count': len(entries),
            'entries':     entries,
        }

async def generate_zip(self, format_id: str = 'best', progress_callback=None):
    info    = await self.fetch_flat()
    entries = self.get_entries(info)
    total   = len(entries)

    zip_buffer = io.BytesIO()

    for i, entry in enumerate(entries, 1):
        video_url_yt = entry['url']
        safe_title   = "".join(
            c for c in entry['title']
            if c.isascii() and (c.isalnum() or c in ' ._-')
        )[:60].strip() or f'video_{i}'

        if progress_callback:
            progress_callback(i, total, entry['title'])

        try:
            info_obj = YTVideoInfo(video_url_yt)
            vid_info = await info_obj.fetch()

            requested = vid_info.get('requested_formats', [])
            if len(requested) == 2:
                vid_url = requested[0]['url']
                aud_url = requested[1]['url']
            else:
                vid_url = vid_info.get('url')
                aud_url = None
        except Exception as e:
            print(f'Skipping {entry["title"]}: {e}')
            continue

        video_bytes = io.BytesIO()

        if aud_url:
            process = await asyncio.create_subprocess_exec(
                'ffmpeg',
                '-i', vid_url,
                '-i', aud_url,
                '-c:v', 'copy',
                '-c:a', 'aac',
                '-b:a', '192k',
                '-f',   'mp4',
                '-movflags', 'frag_keyframe+empty_moov+faststart',
                'pipe:1',
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.DEVNULL,
            )
            try:
                while True:
                    chunk = await process.stdout.read(CHUNK_SIZE)
                    if not chunk:
                        break
                    video_bytes.write(chunk)
            finally:
                try:
                    process.kill()
                    await process.wait()
                except Exception:
                    pass
        else:
            async with aiohttp.ClientSession() as session:
                async with session.get(vid_url) as r:
                    async for chunk in r.content.iter_chunked(CHUNK_SIZE):
                        video_bytes.write(chunk)

        video_bytes.seek(0)
        filename = f'{i:02d}. {safe_title}.mp4'
        with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_STORED) as zf:
            zf.writestr(filename, video_bytes.read())

        zip_buffer.seek(0)
        while True:
            chunk = zip_buffer.read(CHUNK_SIZE)
            if not chunk:
                break
            yield chunk

        zip_buffer.seek(0)
        zip_buffer.truncate(0)

    zip_buffer.seek(0)
    remainder = zip_buffer.read()
    if remainder:
        yield remainder