mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-02 15:12:45 +00:00
Compare commits
4 Commits
d108ca10b9
...
319a2bda83
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
319a2bda83 | ||
|
|
2204cee6d8 | ||
|
|
071ad7dfa0 | ||
|
|
0d8898c3f4 |
@ -1864,13 +1864,13 @@ The following extractors use this feature:
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `tv`, `tv_es6`, `phone`. The default is `tv`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site
|
||||
* `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `es6_tcc`, `es6_tce`, `tv`, `tv_es6`, `phone`, `house`. The default is `tv`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site
|
||||
* `player_js_version`: The player javascript version to use for n/sig deciphering, in the format of `signature_timestamp@hash` (e.g. `20348@0004de42`). The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread,max-depth`. Default is `all,all,all,all,all`
|
||||
* A `max-depth` value of `1` will discard all replies, regardless of the `max-replies` or `max-replies-per-thread` values given
|
||||
* E.g. `all,all,1000,10,2` will get a maximum of 1000 replies total, with up to 10 replies per thread, and only 2 levels of depth (i.e. top-level comments plus their immediate replies). `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash, live adaptive https, and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
|
||||
@ -33,9 +33,12 @@ class Variant(enum.Enum):
|
||||
tce = 'player_ias_tce.vflset/en_US/base.js'
|
||||
es5 = 'player_es5.vflset/en_US/base.js'
|
||||
es6 = 'player_es6.vflset/en_US/base.js'
|
||||
es6_tcc = 'player_es6_tcc.vflset/en_US/base.js'
|
||||
es6_tce = 'player_es6_tce.vflset/en_US/base.js'
|
||||
tv = 'tv-player-ias.vflset/tv-player-ias.js'
|
||||
tv_es6 = 'tv-player-es6.vflset/tv-player-es6.js'
|
||||
phone = 'player-plasma-ias-phone-en_US.vflset/base.js'
|
||||
house = 'house_brand_player.vflset/en_US/base.js'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
|
||||
@ -9,13 +9,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
@ -264,9 +264,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
note='Downloading desktop webpage',
|
||||
headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
|
||||
|
||||
error = self._search_regex(
|
||||
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
|
||||
webpage, 'error', default=None)
|
||||
error = traverse_obj(webpage, {find_element(cls='vp_video_stub_txt')})
|
||||
# Direct link from boosty
|
||||
if (error == 'The author of this video has not been found or is blocked'
|
||||
and not smuggled.get('referrer') and mode == 'videoembed'):
|
||||
|
||||
@ -131,11 +131,16 @@ class TwitterBaseIE(InfoExtractor):
|
||||
video_id, headers=headers, query=query, expected_status=allowed_status,
|
||||
note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
|
||||
|
||||
if result.get('errors'):
|
||||
errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
|
||||
if errors and 'not authorized' in errors:
|
||||
self.raise_login_required(remove_end(errors, '.'))
|
||||
raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
|
||||
if errors := traverse_obj(result, ('errors', ..., {dict})):
|
||||
error_msg = ', '.join(set(traverse_obj(errors, (..., 'message', {str}))))
|
||||
# An error with the message 'Dependency: Unspecified' is a false positive
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/15963
|
||||
if len(errors) == 1 and 'dependency: unspecified' in error_msg.lower():
|
||||
self.write_debug(f'Ignoring error message: "{error_msg}"')
|
||||
elif 'not authorized' in error_msg.lower():
|
||||
self.raise_login_required(remove_end(error_msg, '.'))
|
||||
else:
|
||||
raise ExtractorError(f'Error(s) while querying API: {error_msg or "Unknown error"}')
|
||||
|
||||
return result
|
||||
|
||||
@ -1078,7 +1083,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||
elif typename == 'TweetUnavailable':
|
||||
reason = result.get('reason')
|
||||
if reason == 'NsfwLoggedOut':
|
||||
if reason in ('NsfwLoggedOut', 'NsfwViewerHasNoStatedAge'):
|
||||
self.raise_login_required('NSFW tweet requires authentication')
|
||||
elif reason == 'Protected':
|
||||
self.raise_login_required('You are not authorized to view this protected tweet')
|
||||
|
||||
@ -139,11 +139,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
]
|
||||
_RETURN_TYPE = 'video' # XXX: How to handle multifeed?
|
||||
|
||||
_PLAYER_INFO_RE = (
|
||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||
)
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||
_DEFAULT_CLIENTS = ('android_vr', 'web', 'web_safari')
|
||||
_DEFAULT_JSLESS_CLIENTS = ('android_vr',)
|
||||
@ -1886,10 +1881,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tce': 'player_ias_tce.vflset/en_US/base.js',
|
||||
'es5': 'player_es5.vflset/en_US/base.js',
|
||||
'es6': 'player_es6.vflset/en_US/base.js',
|
||||
'es6_tcc': 'player_es6_tcc.vflset/en_US/base.js',
|
||||
'es6_tce': 'player_es6_tce.vflset/en_US/base.js',
|
||||
'tv': 'tv-player-ias.vflset/tv-player-ias.js',
|
||||
'tv_es6': 'tv-player-es6.vflset/tv-player-es6.js',
|
||||
'phone': 'player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js', # Dead since 19712d96 (2025.11.06)
|
||||
'house': 'house_brand_player.vflset/en_US/base.js', # Used by Google Drive
|
||||
}
|
||||
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
||||
|
||||
@ -2179,13 +2176,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _extract_player_info(cls, player_url):
|
||||
for player_re in cls._PLAYER_INFO_RE:
|
||||
id_m = re.search(player_re, player_url)
|
||||
if id_m:
|
||||
break
|
||||
else:
|
||||
if m := re.search(r'/s/player/(?P<id>[a-fA-F0-9]{8,})/', player_url):
|
||||
return m.group('id')
|
||||
raise ExtractorError(f'Cannot identify player {player_url!r}')
|
||||
return id_m.group('id')
|
||||
|
||||
def _load_player(self, video_id, player_url, fatal=True):
|
||||
player_js_key = self._player_js_cache_key(player_url)
|
||||
@ -3219,6 +3212,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
])
|
||||
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
||||
format_types = self._configuration_arg('formats')
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
all_formats = 'duplicate' in format_types
|
||||
if self._configuration_arg('include_duplicate_formats'):
|
||||
all_formats = True
|
||||
@ -3464,7 +3458,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
https_fmts = []
|
||||
|
||||
for fmt_stream in streaming_formats:
|
||||
if fmt_stream.get('targetDurationSec'):
|
||||
# Live adaptive https formats are not supported: skip unless extractor-arg given
|
||||
if fmt_stream.get('targetDurationSec') and skip_bad_formats:
|
||||
continue
|
||||
|
||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||
@ -3576,7 +3571,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yield from process_https_formats()
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
|
||||
skip_manifests = set(self._configuration_arg('skip'))
|
||||
if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
|
||||
@ -4086,16 +4080,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
|
||||
def is_bad_format(fmt):
|
||||
if needs_live_processing and not fmt.get('is_from_start'):
|
||||
return True
|
||||
elif (live_status == 'is_live' and needs_live_processing != 'is_live'
|
||||
and fmt.get('protocol') == 'http_dash_segments'):
|
||||
return True
|
||||
def adjust_incomplete_format(fmt, note_suffix='(Last 2 hours)', pref_adjustment=-10):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) + pref_adjustment
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), note_suffix, delim=' ')
|
||||
|
||||
for fmt in filter(is_bad_format, formats):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) - 10
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
|
||||
# Adjust preference and format note for incomplete live/post-live formats
|
||||
if live_status in ('is_live', 'post_live'):
|
||||
for fmt in formats:
|
||||
protocol = fmt.get('protocol')
|
||||
# Currently, protocol isn't set for adaptive https formats, but this could change
|
||||
is_adaptive = protocol in (None, 'http', 'https')
|
||||
if live_status == 'post_live' and is_adaptive:
|
||||
# Post-live adaptive formats cause HttpFD to raise "Did not get any data blocks"
|
||||
# These formats are *only* useful to external applications, so we can hide them
|
||||
# Set their preference <= -1000 so that FormatSorter flags them as 'hidden'
|
||||
adjust_incomplete_format(fmt, note_suffix='(ended)', pref_adjustment=-5000)
|
||||
# Is it live with --live-from-start? Or is it post-live and its duration is >2hrs?
|
||||
elif needs_live_processing:
|
||||
if not fmt.get('is_from_start'):
|
||||
# Post-live m3u8 formats for >2hr streams
|
||||
adjust_incomplete_format(fmt)
|
||||
elif live_status == 'is_live':
|
||||
if protocol == 'http_dash_segments':
|
||||
# Live DASH formats without --live-from-start
|
||||
adjust_incomplete_format(fmt)
|
||||
elif is_adaptive:
|
||||
# Incomplete live adaptive https formats
|
||||
adjust_incomplete_format(fmt, note_suffix='(incomplete)', pref_adjustment=-20)
|
||||
|
||||
if needs_live_processing:
|
||||
self._prepare_live_from_start_formats(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user