diff --git a/README.md b/README.md index 3b4d82e4aa..6941f4684d 100644 --- a/README.md +++ b/README.md @@ -2272,8 +2272,6 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: * **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used -* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats - * **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md) * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc. @@ -2328,7 +2326,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior -* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ +* (Not currently implemented) ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible. You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ * yt-dlp versions from 2021.09.01 to 2022.11.11 (inclusive) applied `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions from 2021.11.10 to 2023.06.21 (inclusive) estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 1e9e63c13c..37a42af91a 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,17 +1,14 @@ import enum import functools import io -import json import os import re import subprocess import sys import tempfile import time -import uuid from .fragment import FragmentFD -from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( Popen, @@ -25,7 +22,6 @@ from ..utils import ( cli_valueless_option, determine_ext, encodeArgument, - find_available_port, remove_end, traverse_obj, version_tuple, @@ -309,38 +305,17 @@ class WgetFD(ExternalFD): class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' - SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') - - @staticmethod - def supports_manifest(manifest): - UNSUPPORTED_FEATURES = [ - r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] - # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 - ] - check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) - return all(check_results) + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') @staticmethod def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' - def _call_downloader(self, tmpfilename, info_dict): - # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 - if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): - info_dict['__rpc'] = { - 'port': find_available_port() or 19190, - 'secret': str(uuid.uuid4()), - } - return super()._call_downloader(tmpfilename, info_dict) - def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--no-conf', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', - '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] - if 'fragments' in info_dict: - cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] - else: - cmd += ['--min-split-size', '1M'] + '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16', + '--min-split-size', '1M'] cmd += [f'--load-cookies={self._write_cookies()}'] if info_dict.get('http_headers') is not None: @@ -354,12 +329,6 @@ class Aria2cFD(ExternalFD): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() - if '__rpc' in info_dict: - cmd += [ - '--enable-rpc', - f'--rpc-listen-port={info_dict["__rpc"]["port"]}', - f'--rpc-secret={info_dict["__rpc"]["secret"]}'] - # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -369,106 +338,17 @@ class Aria2cFD(ExternalFD): dn = os.path.dirname(tmpfilename) if dn: cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep] - if 'fragments' not in info_dict: - cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))] - cmd += ['--auto-file-renaming=false'] - if 'fragments' in info_dict: - cmd += ['--uri-selector=inorder'] - url_list_file = f'{tmpfilename}.frag.urls' - url_list = [] - for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' - url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) - stream, _ = self.sanitize_open(url_list_file, 'wb') - stream.write('\n'.join(url_list).encode()) - stream.close() - cmd += ['-i', self._aria2c_filename(url_list_file)] - else: - cmd += ['--', info_dict['url']] + cmd += [ + '--out', + self._aria2c_filename(os.path.basename(tmpfilename)), + '--auto-file-renaming=false', + '--', + info_dict['url'], + ] + return cmd - def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): - # Does not actually need to be UUID, just unique - sanitycheck = str(uuid.uuid4()) - d = json.dumps({ - 'jsonrpc': '2.0', - 'id': sanitycheck, - 'method': method, - 'params': [f'token:{rpc_secret}', *params], - }).encode() - request = Request( - f'http://localhost:{rpc_port}/jsonrpc', - data=d, headers={ - 'Content-Type': 'application/json', - 'Content-Length': f'{len(d)}', - }, proxies={'all': None}) - with self.ydl.urlopen(request) as r: - resp = json.load(r) - assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' - return resp['result'] - - def _call_process(self, cmd, info_dict): - if '__rpc' not in info_dict: - return super()._call_process(cmd, info_dict) - - send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) - started = time.time() - - fragmented = 'fragments' in info_dict - frag_count = len(info_dict['fragments']) if fragmented else 1 - status = { - 'filename': info_dict.get('_filename'), - 'status': 'downloading', - 'elapsed': 0, - 'downloaded_bytes': 0, - 'fragment_count': frag_count if fragmented else None, - 'fragment_index': 0 if fragmented else None, - } - self._hook_progress(status, info_dict) - - def get_stat(key, *obj, average=False): - val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] - return sum(val) / (len(val) if average else 1) - - with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: - # Add a small sleep so that RPC client can receive response, - # or the connection stalls infinitely - time.sleep(0.2) - retval = p.poll() - while retval is None: - # We don't use tellStatus as we won't know the GID without reading stdout - # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive - active = send_rpc('aria2.tellActive') - completed = send_rpc('aria2.tellStopped', [0, frag_count]) - - downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) - speed = get_stat('downloadSpeed', active) - total = frag_count * get_stat('totalLength', active, completed, average=True) - if total < downloaded: - total = None - - status.update({ - 'downloaded_bytes': int(downloaded), - 'speed': speed, - 'total_bytes': None if fragmented else total, - 'total_bytes_estimate': total, - 'eta': (total - downloaded) / (speed or 1), - 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, - 'elapsed': time.time() - started, - }) - self._hook_progress(status, info_dict) - - if not active and len(completed) >= frag_count: - send_rpc('aria2.shutdown') - retval = p.wait() - break - - time.sleep(0.1) - retval = p.poll() - - return '', p.stderr.read(), retval - class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version'