506 lines
18 KiB
Python
506 lines
18 KiB
Python
import yt_dlp
|
|
import aiohttp
|
|
import asyncio
|
|
from _config import *
|
|
import zipfile
|
|
import io
|
|
|
|
|
|
class YTVideoInfo:
|
|
def __init__(self, link: str):
|
|
self.link = link
|
|
|
|
def _fetch_sync(self) -> dict:
|
|
with cache_lock:
|
|
if self.link in info_cache:
|
|
return info_cache[self.link]
|
|
|
|
with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
|
|
info = ydl.extract_info(self.link, download=False)
|
|
|
|
with cache_lock:
|
|
info_cache[self.link] = info
|
|
|
|
return info
|
|
|
|
async def fetch(self) -> dict:
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(executor, self._fetch_sync)
|
|
|
|
def _validate_sync(self) -> tuple[bool, str]:
|
|
try:
|
|
info = self._fetch_sync()
|
|
if not info:
|
|
return False, 'Could not fetch video info.'
|
|
return True, ''
|
|
except yt_dlp.utils.DownloadError as e:
|
|
msg = str(e)
|
|
if 'Private video' in msg: return False, 'This video is private.'
|
|
if 'age' in msg.lower(): return False, 'This video is age-restricted.'
|
|
if 'unavailable' in msg.lower(): return False, 'This video is unavailable.'
|
|
return False, 'Could not load this video.'
|
|
|
|
async def validate(self) -> tuple[bool, str]:
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(executor, self._validate_sync)
|
|
|
|
def get_formats(self, info: dict) -> list[dict]:
|
|
seen, formats = set(), []
|
|
for f in reversed(info['formats']):
|
|
height = f.get('height')
|
|
fps = f.get('fps')
|
|
ext = f.get('ext', '')
|
|
if not height or f.get('vcodec', 'none') == 'none':
|
|
continue
|
|
key = (height, fps)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
size = f.get('filesize') or f.get('filesize_approx')
|
|
size_str = None
|
|
if size:
|
|
if size >= 1_073_741_824:
|
|
size_str = f"{size / 1_073_741_824:.1f} GB"
|
|
elif size >= 1_048_576:
|
|
size_str = f"{size / 1_048_576:.1f} MB"
|
|
else:
|
|
size_str = f"{size / 1024:.0f} KB"
|
|
|
|
audio_size = 0
|
|
for af in info['formats']:
|
|
if (af.get('vcodec') == 'none'
|
|
and af.get('acodec') != 'none'
|
|
and af.get('ext') == 'm4a'):
|
|
audio_size = af.get('filesize') or af.get('filesize_approx') or 0
|
|
break
|
|
|
|
total_size = (size or 0) + audio_size
|
|
|
|
label = f"{height}p"
|
|
if fps and fps > 30:
|
|
label += f" {int(fps)}fps"
|
|
label += f" ({ext.upper()})"
|
|
if size_str:
|
|
label += f" ~ {size_str}"
|
|
|
|
formats.append({
|
|
'id': f['format_id'],
|
|
'label': label,
|
|
'height': height,
|
|
'ext': ext,
|
|
'fps': fps,
|
|
'vcodec': f.get('vcodec', ''),
|
|
'acodec': f.get('acodec', ''),
|
|
'filesize': total_size or None,
|
|
})
|
|
|
|
return sorted(formats, key=lambda x: x['height'], reverse=True)
|
|
|
|
def get_subtitles(self, info: dict) -> list[dict]:
|
|
subtitles = []
|
|
for lang, tracks in info.get('subtitles', {}).items():
|
|
if any(t.get('ext') == 'vtt' for t in tracks):
|
|
subtitles.append({'lang': lang, 'label': f"{lang} (manual)", 'auto': '0'})
|
|
for lang, tracks in info.get('automatic_captions', {}).items():
|
|
if any(t.get('ext') == 'vtt' for t in tracks):
|
|
subtitles.append({'lang': lang, 'label': f"{lang} (auto)", 'auto': '1'})
|
|
return subtitles
|
|
|
|
def get_subtitle_vtt_url(self, info: dict, lang: str, auto: bool) -> str | None:
|
|
source = info.get('automatic_captions' if auto else 'subtitles', {})
|
|
tracks = source.get(lang, [])
|
|
return next((t['url'] for t in tracks if t.get('ext') == 'vtt'), None)
|
|
|
|
def summary(self, info: dict) -> dict:
|
|
raw_date = info.get('upload_date', '')
|
|
n = info.get('view_count', 0)
|
|
|
|
def fmt_views(n):
|
|
if not n: return 'N/A'
|
|
if n >= 1_000_000: return f"{n/1_000_000:.1f}M"
|
|
if n >= 1_000: return f"{n/1_000:.1f}K"
|
|
return str(n)
|
|
|
|
def fmt_duration(s):
|
|
if not s: return 'N/A'
|
|
h, m, sec = s // 3600, (s % 3600) // 60, s % 60
|
|
return f"{h}:{m:02}:{sec:02}" if h else f"{m}:{sec:02}"
|
|
|
|
return {
|
|
'title': info.get('title', 'Video'),
|
|
'uploader': info.get('uploader', 'N/A'),
|
|
'views': fmt_views(n),
|
|
'duration': fmt_duration(info.get('duration')),
|
|
'upload_date': f"{raw_date[:4]}-{raw_date[4:6]}-{raw_date[6:]}" if raw_date else 'N/A',
|
|
'description': info.get('description', 'No description.'),
|
|
}
|
|
|
|
|
|
|
|
class YTVideoStream:
|
|
def __init__(self, link: str, format_id: str = 'best'):
|
|
self.link = link
|
|
self.format_id = format_id
|
|
|
|
def _get_urls_sync(self) -> tuple[str, str | None]:
|
|
ydl_opts = {
|
|
'format': f'{self.format_id}+bestaudio[ext=m4a]/bestvideo+bestaudio',
|
|
'quiet': True,
|
|
}
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(self.link, download=False)
|
|
|
|
if 'requested_formats' in info and len(info['requested_formats']) == 2:
|
|
return (info['requested_formats'][0]['url'],
|
|
info['requested_formats'][1]['url'])
|
|
return info['url'], None
|
|
|
|
async def _get_urls(self) -> tuple[str, str | None]:
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(executor, self._get_urls_sync)
|
|
|
|
async def generate(
|
|
self,
|
|
prebuffer: bool = True,
|
|
stop_event: asyncio.Event | None = None,
|
|
):
|
|
"""
|
|
Yields MP4 chunks.
|
|
stop_event — when set, cleanly terminates FFmpeg and stops yielding.
|
|
This allows the server to cancel an in-progress stream when the
|
|
client reconnects with a new format_id.
|
|
"""
|
|
video_url, audio_url = await self._get_urls()
|
|
|
|
if audio_url:
|
|
process = await asyncio.create_subprocess_exec(
|
|
'ffmpeg',
|
|
'-i', video_url,
|
|
'-i', audio_url,
|
|
'-c:v', 'copy',
|
|
'-c:a', 'aac',
|
|
'-b:a', '192k',
|
|
'-g', '60',
|
|
'-f', 'mp4',
|
|
'-movflags', 'frag_keyframe+empty_moov+faststart',
|
|
'-frag_duration', '2000000',
|
|
'pipe:1',
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.DEVNULL,
|
|
)
|
|
|
|
try:
|
|
if prebuffer:
|
|
pre_buffer, buffered_mb = [], 0
|
|
while buffered_mb < PRE_BUFFER_MB:
|
|
if stop_event and stop_event.is_set():
|
|
return
|
|
try:
|
|
chunk = await asyncio.wait_for(
|
|
process.stdout.read(CHUNK_SIZE),
|
|
timeout=15.0
|
|
)
|
|
except asyncio.TimeoutError:
|
|
yield b''
|
|
continue
|
|
if not chunk:
|
|
for c in pre_buffer: yield c
|
|
return
|
|
pre_buffer.append(chunk)
|
|
buffered_mb += len(chunk) / (1024 * 1024)
|
|
for c in pre_buffer:
|
|
yield c
|
|
else:
|
|
first = True
|
|
while first:
|
|
if stop_event and stop_event.is_set():
|
|
return
|
|
try:
|
|
chunk = await asyncio.wait_for(
|
|
process.stdout.read(CHUNK_SIZE),
|
|
timeout=15.0
|
|
)
|
|
if chunk:
|
|
yield chunk
|
|
first = False
|
|
else:
|
|
return
|
|
except asyncio.TimeoutError:
|
|
yield b''
|
|
continue
|
|
|
|
while True:
|
|
if stop_event and stop_event.is_set():
|
|
return
|
|
try:
|
|
chunk = await asyncio.wait_for(
|
|
process.stdout.read(CHUNK_SIZE),
|
|
timeout=30.0
|
|
)
|
|
except asyncio.TimeoutError:
|
|
yield b''
|
|
continue
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
|
|
finally:
|
|
try:
|
|
process.kill()
|
|
await process.wait()
|
|
except Exception:
|
|
pass
|
|
|
|
else:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(video_url) as r:
|
|
async for chunk in r.content.iter_chunked(CHUNK_SIZE):
|
|
if stop_event and stop_event.is_set():
|
|
return
|
|
yield chunk
|
|
|
|
|
|
class YTVideoDownloader:
|
|
def __init__(self, link: str, format_id: str = 'best'):
|
|
self.link = link
|
|
self.format_id = format_id
|
|
|
|
def _download_sync(self, progress_callback=None) -> tuple[str, str]:
|
|
tmp_dir = tempfile.mkdtemp()
|
|
out_tmpl = os.path.join(tmp_dir, 'video.%(ext)s')
|
|
|
|
ydl_opts = {
|
|
'format': (
|
|
f'{self.format_id}[ext=mp4]+bestaudio[ext=m4a]'
|
|
f'/bestvideo[ext=mp4]+bestaudio[ext=m4a]'
|
|
f'/best[ext=mp4]/best'
|
|
),
|
|
'outtmpl': out_tmpl,
|
|
'merge_output_format': 'mp4',
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
|
|
'concurrent_fragment_downloads': 4,
|
|
|
|
'retries': 5,
|
|
'fragment_retries': 5,
|
|
|
|
'socket_timeout': 30,
|
|
|
|
'postprocessors': [
|
|
{
|
|
'key': 'FFmpegVideoRemuxer',
|
|
'preferedformat': 'mp4',
|
|
},
|
|
],
|
|
'postprocessor_args': {
|
|
'ffmpeg': [
|
|
'-c:v', 'copy',
|
|
'-c:a', 'aac',
|
|
'-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
]
|
|
},
|
|
}
|
|
|
|
if progress_callback:
|
|
def hook(d):
|
|
if d['status'] == 'downloading':
|
|
raw = d.get('_percent_str', '0%').strip().replace('%', '')
|
|
try:
|
|
pct = min(float(raw), 99.0)
|
|
progress_callback(pct)
|
|
except Exception:
|
|
pass
|
|
elif d['status'] == 'finished':
|
|
progress_callback(99)
|
|
|
|
ydl_opts['progress_hooks'] = [hook]
|
|
|
|
try:
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
ydl.download([self.link])
|
|
except Exception as e:
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
raise RuntimeError(f'yt-dlp failed: {e}')
|
|
|
|
mp4_files = [
|
|
f for f in os.listdir(tmp_dir)
|
|
if f.endswith('.mp4')
|
|
]
|
|
if not mp4_files:
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
raise FileNotFoundError('No MP4 found after download.')
|
|
|
|
return os.path.join(tmp_dir, mp4_files[0]), tmp_dir
|
|
|
|
async def download(self, progress_callback=None) -> tuple[str, str]:
|
|
loop = asyncio.get_event_loop()
|
|
try:
|
|
return await asyncio.wait_for(
|
|
loop.run_in_executor(
|
|
executor,
|
|
lambda: self._download_sync(progress_callback)
|
|
),
|
|
timeout=600
|
|
)
|
|
except asyncio.TimeoutError:
|
|
raise TimeoutError('Download timed out after 10 minutes.')
|
|
|
|
async def stream_file(self, filepath: str):
|
|
file_size = os.path.getsize(filepath)
|
|
|
|
async def generator():
|
|
with open(filepath, 'rb') as f:
|
|
while True:
|
|
chunk = f.read(CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
|
|
return generator(), file_size
|
|
|
|
|
|
class YTPlaylist:
|
|
def __init__(self, link: str):
|
|
self.link = link
|
|
|
|
def _fetch_sync(self) -> dict:
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_flat': False,
|
|
'ignoreerrors': True,
|
|
'socket_timeout': 30,
|
|
}
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
return ydl.extract_info(self.link, download=False)
|
|
|
|
def _fetch_flat_sync(self) -> dict:
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_flat': True,
|
|
'ignoreerrors': True,
|
|
}
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
return ydl.extract_info(self.link, download=False)
|
|
|
|
async def fetch_flat(self) -> dict:
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(executor, self._fetch_flat_sync)
|
|
|
|
async def fetch(self) -> dict:
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(executor, self._fetch_sync)
|
|
|
|
def get_entries(self, info: dict) -> list[dict]:
|
|
entries = []
|
|
for i, entry in enumerate(info.get('entries', []), 1):
|
|
if not entry:
|
|
continue
|
|
entries.append({
|
|
'index': i,
|
|
'id': entry.get('id', ''),
|
|
'title': entry.get('title', f'Video {i}'),
|
|
'url': entry.get('url') or f"https://www.youtube.com/watch?v={entry.get('id')}",
|
|
'duration': entry.get('duration'),
|
|
'uploader': entry.get('uploader', ''),
|
|
'thumbnail': entry.get('thumbnail', ''),
|
|
})
|
|
return entries
|
|
|
|
def summary(self, info: dict) -> dict:
|
|
entries = self.get_entries(info)
|
|
return {
|
|
'title': info.get('title', 'Playlist'),
|
|
'uploader': info.get('uploader') or info.get('channel', 'N/A'),
|
|
'video_count': len(entries),
|
|
'entries': entries,
|
|
}
|
|
|
|
async def generate_zip(self, format_id: str = 'best', progress_callback=None):
|
|
info = await self.fetch_flat()
|
|
entries = self.get_entries(info)
|
|
total = len(entries)
|
|
|
|
zip_buffer = io.BytesIO()
|
|
|
|
for i, entry in enumerate(entries, 1):
|
|
video_url_yt = entry['url']
|
|
safe_title = "".join(
|
|
c for c in entry['title']
|
|
if c.isascii() and (c.isalnum() or c in ' ._-')
|
|
)[:60].strip() or f'video_{i}'
|
|
|
|
if progress_callback:
|
|
progress_callback(i, total, entry['title'])
|
|
|
|
try:
|
|
info_obj = YTVideoInfo(video_url_yt)
|
|
vid_info = await info_obj.fetch()
|
|
|
|
requested = vid_info.get('requested_formats', [])
|
|
if len(requested) == 2:
|
|
vid_url = requested[0]['url']
|
|
aud_url = requested[1]['url']
|
|
else:
|
|
vid_url = vid_info.get('url')
|
|
aud_url = None
|
|
except Exception as e:
|
|
print(f'Skipping {entry["title"]}: {e}')
|
|
continue
|
|
|
|
video_bytes = io.BytesIO()
|
|
|
|
if aud_url:
|
|
process = await asyncio.create_subprocess_exec(
|
|
'ffmpeg',
|
|
'-i', vid_url,
|
|
'-i', aud_url,
|
|
'-c:v', 'copy',
|
|
'-c:a', 'aac',
|
|
'-b:a', '192k',
|
|
'-f', 'mp4',
|
|
'-movflags', 'frag_keyframe+empty_moov+faststart',
|
|
'pipe:1',
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.DEVNULL,
|
|
)
|
|
try:
|
|
while True:
|
|
chunk = await process.stdout.read(CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
video_bytes.write(chunk)
|
|
finally:
|
|
try:
|
|
process.kill()
|
|
await process.wait()
|
|
except Exception:
|
|
pass
|
|
else:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(vid_url) as r:
|
|
async for chunk in r.content.iter_chunked(CHUNK_SIZE):
|
|
video_bytes.write(chunk)
|
|
|
|
video_bytes.seek(0)
|
|
filename = f'{i:02d}. {safe_title}.mp4'
|
|
with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_STORED) as zf:
|
|
zf.writestr(filename, video_bytes.read())
|
|
|
|
zip_buffer.seek(0)
|
|
while True:
|
|
chunk = zip_buffer.read(CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
|
|
zip_buffer.seek(0)
|
|
zip_buffer.truncate(0)
|
|
|
|
zip_buffer.seek(0)
|
|
remainder = zip_buffer.read()
|
|
if remainder:
|
|
yield remainder |