diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py index 62f7d45d49..9db485af9b 100644 --- a/test/test_downloader_external.py +++ b/test/test_downloader_external.py @@ -8,8 +8,15 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.cookiejar +import http.server +import ipaddress +import pytest +import json +import tempfile +import threading from test.helper import FakeYDL +from yt_dlp.networking.common import HTTPHeaderDict from yt_dlp.downloader.external import ( Aria2cFD, AxelFD, @@ -75,34 +82,113 @@ class TestWgetFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = WgetFD(ydl, {}) - self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) - # Test cookiejar tempfile arg is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) - self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + assert '--load-cookies' in downloader._make_cmd('test', TEST_INFO) -class TestCurlFD(unittest.TestCase): - def test_make_cmd(self): +class HTTPTestHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self, /): + if self.path.startswith('/redirect'): + target = self.headers.get('X-Redirect-Location') + if not target: + self.send_error(500) + return + self.send_response(301) + self.send_header('Location', target) + self.end_headers() + + elif self.path == '/headers': + self.send_response(200) + self.end_headers() + self.wfile.write(json.dumps(list(self.headers.items())).encode()) + +class HTTPTestServer(http.server.HTTPServer): + @property + def address(self, /): + return ipaddress.ip_address(self.server_address[0]) + + @property + def uri(self, /): + addr, port, *_ = self.server_address + if ':' in addr: + addr = f'[{addr}]' + return f'http://{addr}:{port}' + + def __enter__(self, /): + result = super().__enter__() + thread = threading.Thread(target=self.serve_forever) + thread.start() + return result + + def __exit__(self, /, *exc): + self.shutdown() + return super().__exit__(*exc) + + +class TestDownloaderCookieBehavior: + @pytest.mark.parametrize('downloader_cls', [ + pytest.param(CurlFD, marks=pytest.mark.skipif(not CurlFD.available() or CurlFD._curl_version < CurlFD._MIN_VERSION_FOR_STDIN_COOKIES, reason='curl unavailable or too old')), + pytest.param(WgetFD, marks=pytest.mark.skipif(not WgetFD.available(), reason='wget unavailable')), + pytest.param(Aria2cFD, marks=pytest.mark.skipif(not Aria2cFD.available(), reason='aria2c unavailable')), + ]) + def test_cookie_behavior(self, /, downloader_cls): with FakeYDL() as ydl: - downloader = CurlFD(ydl, {}) - self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO)) - # Test cookie header is added - ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) - self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO)) - self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO)) + downloader = downloader_cls(ydl, {}) + + with HTTPTestServer(('localhost', 0), HTTPTestHandler) as server_a: + second_addr = server_a.address + 1 + if not second_addr.is_loopback: + second_addr = server_a.address - 1 + assert second_addr.is_loopback, f'failed to find derived loopback address for {server_a.address}' + + ydl.cookiejar.set_cookie(http.cookiejar.Cookie( + 1, + 'c', + 'test', + server_a.server_address[1], + True, + str(server_a.address), + True, + False, + '/', + False, + False, + 0, + True, + None, + None, + {}, + )) + + with tempfile.NamedTemporaryFile(delete=False) as file: + file.close() + assert downloader.real_download(file.name, {'url': f'{server_a.uri}/headers'}), 'Expected download (/headers) to succeed' + + with open(file.name, 'rb') as f: + data = HTTPHeaderDict(json.load(f)) + assert 'c=test' in data.get('Cookie', '').split(';'), 'Expected cookie to be set in initial request' + + with HTTPTestServer((str(second_addr), 0), HTTPTestHandler) as server_b: + assert downloader.real_download(file.name, { + 'url': f'{server_a.uri}/redirect', + 'http_headers': { + 'X-Redirect-Location': f'{server_b.uri}/headers', + }, + }), 'Expected download (/redirect) to succeed' + + with open(file.name, 'rb') as f: + data = HTTPHeaderDict(json.load(f)) + + assert data.get('Cookie') is None, 'Expected cookie to be unset in redirected request' class TestAria2cFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = Aria2cFD(ydl, {}) - downloader._make_cmd('test', TEST_INFO) - self.assertFalse(hasattr(downloader, '_cookies_tempfile')) - - # Test cookiejar tempfile arg is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) cmd = downloader._make_cmd('test', TEST_INFO) - self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd) + assert f'--load-cookies={downloader._cookies_tempfile}' in cmd @unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found') diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 79c10485c6..1e9e63c13c 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,5 +1,6 @@ import enum import functools +import io import json import os import re @@ -16,6 +17,7 @@ from ..utils import ( Popen, RetryManager, _configuration_args, + _get_exe_version_output, check_executable, classproperty, cli_bool_option, @@ -26,6 +28,7 @@ from ..utils import ( find_available_port, remove_end, traverse_obj, + version_tuple, ) @@ -136,7 +139,9 @@ class ExternalFD(FragmentFD): self._cookies_tempfile = tmp_cookies.name self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"') # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename - self.ydl.cookiejar.save(self._cookies_tempfile) + self.ydl.cookiejar.save(self._cookies_tempfile, True, True) + with open(self.ydl.cookiejar.filename or self._cookies_tempfile, "r") as file: + print("cookies", repr(file.read())) return self.ydl.cookiejar.filename or self._cookies_tempfile def _call_downloader(self, tmpfilename, info_dict): @@ -195,12 +200,39 @@ class ExternalFD(FragmentFD): class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' _CAPTURE_STDERR = False # curl writes the progress to stderr + _MIN_VERSION_FOR_STDIN_COOKIES = (7, 59) + + @classmethod + def available(cls, path=None): + if path is None: + path = 'curl' + output = _get_exe_version_output(path, ['-V']) + if not output: + return False + parts = output.split(' ', maxsplit=2) + if len(parts) < 3: + return False + + cls.exe = path + cls._curl_version = version_tuple(parts[1]) + return path def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] - cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) - if cookie_header: - cmd += ['--cookie', cookie_header] + + if self._curl_version >= self._MIN_VERSION_FOR_STDIN_COOKIES: + # Supports `--cookies -` + cmd += ['--cookie', '-'] + elif os.path.islink('/dev/fd/0'): + cmd += ['--cookie', '/dev/fd/0'] + else: + cookies_file = self._write_cookies() + if '=' in cookies_file: + # XXX: what to raise here? + raise RuntimeError('curl version too old or temp directory contains `=`; please use another downloader or update curl') + assert cookies_file != '-' + cmd += ['--cookie', cookies_file] + if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] @@ -222,6 +254,16 @@ class CurlFD(ExternalFD): cmd += ['--', info_dict['url']] return cmd + def _call_process(self, cmd, info_dict): + if self._curl_version > self._MIN_VERSION_FOR_STDIN_COOKIES or os.path.islink('/dev/fd/0'): + # Supports `--cookies -` or reading from device file as `--cookies /dev/fd/0` + buffer = io.StringIO() + self.ydl.cookiejar._really_save(buffer, True, True) + return Popen.run(cmd, text=True, input=buffer.getvalue()) + + # Cookies already passed via cookiesfile + return Popen.run(cmd, text=True) + class AxelFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -244,8 +286,7 @@ class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto'] - if self.ydl.cookiejar.get_cookie_header(info_dict['url']): - cmd += ['--load-cookies', self._write_cookies()] + cmd += ['--load-cookies', self._write_cookies()] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] @@ -301,8 +342,7 @@ class Aria2cFD(ExternalFD): else: cmd += ['--min-split-size', '1M'] - if self.ydl.cookiejar.get_cookie_header(info_dict['url']): - cmd += [f'--load-cookies={self._write_cookies()}'] + cmd += [f'--load-cookies={self._write_cookies()}'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index b4b19f0603..56847961ef 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -915,10 +915,12 @@ class Popen(subprocess.Popen): self.wait(timeout=timeout) @classmethod - def run(cls, *args, timeout=None, **kwargs): + def run(cls, *args, timeout=None, input=None, **kwargs): + if input is not None and kwargs.get('stdin') is None: + kwargs['stdin'] = subprocess.PIPE with cls(*args, **kwargs) as proc: default = '' if proc.__text_mode else b'' - stdout, stderr = proc.communicate_or_kill(timeout=timeout) + stdout, stderr = proc.communicate_or_kill(input=input, timeout=timeout) return stdout or default, stderr or default, proc.returncode