mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-29 12:05:55 +00:00
Compare commits
4 Commits
fa35eb27ea
...
7af6d81f35
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7af6d81f35 | ||
|
|
a56217f9f6 | ||
|
|
afc44022d0 | ||
|
|
1d2f0edaf9 |
@ -560,6 +560,13 @@ class FFmpegFD(ExternalFD):
|
|||||||
elif isinstance(conn, str):
|
elif isinstance(conn, str):
|
||||||
args += ['-rtmp_conn', conn]
|
args += ['-rtmp_conn', conn]
|
||||||
|
|
||||||
|
elif protocol == 'http_dash_segments' and info_dict.get('is_live'):
|
||||||
|
# ffmpeg may try to read past the latest available segments for
|
||||||
|
# live DASH streams unless we pass `-re`. In modern ffmpeg, this
|
||||||
|
# is an alias of `-readrate 1`, but `-readrate` was not added
|
||||||
|
# until ffmpeg 5.0, so we must stick to using `-re`
|
||||||
|
args += ['-re']
|
||||||
|
|
||||||
url = fmt['url']
|
url = fmt['url']
|
||||||
if self.params.get('enable_file_urls') and url.startswith('file:'):
|
if self.params.get('enable_file_urls') and url.startswith('file:'):
|
||||||
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
|
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
|
||||||
|
|||||||
@ -340,8 +340,9 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
thumbnails=self._extract_thumbnails(view_model, (
|
thumbnails=self._extract_thumbnails(view_model, (
|
||||||
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
|
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
|
||||||
duration=traverse_obj(view_model, (
|
duration=traverse_obj(view_model, (
|
||||||
'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
|
'contentImage', 'thumbnailViewModel', 'overlays', ...,
|
||||||
'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
|
(('thumbnailBottomOverlayViewModel', 'badges'), ('thumbnailOverlayBadgeViewModel', 'thumbnailBadges')),
|
||||||
|
..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
|
||||||
timestamp=(traverse_obj(view_model, (
|
timestamp=(traverse_obj(view_model, (
|
||||||
'metadata', 'lockupMetadataViewModel', 'metadata', 'contentMetadataViewModel', 'metadataRows',
|
'metadata', 'lockupMetadataViewModel', 'metadata', 'contentMetadataViewModel', 'metadataRows',
|
||||||
..., 'metadataParts', ..., 'text', 'content', {lambda t: self._parse_time_text(t, report_failure=False)}, any))
|
..., 'metadataParts', ..., 'text', 'content', {lambda t: self._parse_time_text(t, report_failure=False)}, any))
|
||||||
|
|||||||
@ -1556,6 +1556,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
# Youtube Music Auto-generated description with dot in artist name
|
||||||
|
'url': 'https://music.youtube.com/watch?v=DbCvuSGfR3Y',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'DbCvuSGfR3Y',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Back Around',
|
||||||
|
'artists': ['half·alive'],
|
||||||
|
'track': 'Back Around',
|
||||||
|
'album': 'Conditions Of A Punk',
|
||||||
|
'release_date': '20221202',
|
||||||
|
'release_year': 2021,
|
||||||
|
'alt_title': 'Back Around',
|
||||||
|
'description': 'md5:bfc0e2b3cc903a608d8a85a13cb50f95',
|
||||||
|
'media_type': 'video',
|
||||||
|
'uploader': 'half•alive',
|
||||||
|
'channel': 'half•alive',
|
||||||
|
'channel_id': 'UCYQrYophdVI3nVDPOnXyIng',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCYQrYophdVI3nVDPOnXyIng',
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 223,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/DbCvuSGfR3Y/maxresdefault.webp',
|
||||||
|
'heatmap': 'count:100',
|
||||||
|
'categories': ['Music'],
|
||||||
|
'tags': ['half·alive', 'Conditions Of A Punk', 'Back Around'],
|
||||||
|
'creators': ['half·alive'],
|
||||||
|
'timestamp': 1669889281,
|
||||||
|
'upload_date': '20221201',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'availability': 'public',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
# <object>
|
# <object>
|
||||||
@ -3023,8 +3063,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
|
def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
|
||||||
CHUNK_SIZE = 10 << 20
|
CHUNK_SIZE = 10 << 20
|
||||||
PREFERRED_LANG_VALUE = 10
|
ORIGINAL_LANG_VALUE = 10
|
||||||
original_language = None
|
DEFAULT_LANG_VALUE = 5
|
||||||
|
language_map = {
|
||||||
|
ORIGINAL_LANG_VALUE: None,
|
||||||
|
DEFAULT_LANG_VALUE: None,
|
||||||
|
}
|
||||||
itags, stream_ids = collections.defaultdict(set), []
|
itags, stream_ids = collections.defaultdict(set), []
|
||||||
itag_qualities, res_qualities = {}, {0: None}
|
itag_qualities, res_qualities = {}, {0: None}
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -3064,6 +3108,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# For handling potential pre-playback required waiting period
|
# For handling potential pre-playback required waiting period
|
||||||
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
||||||
|
|
||||||
|
def get_language_code_and_preference(fmt_stream):
|
||||||
|
audio_track = fmt_stream.get('audioTrack') or {}
|
||||||
|
display_name = audio_track.get('displayName') or ''
|
||||||
|
language_code = audio_track.get('id', '').split('.')[0] or None
|
||||||
|
if 'descriptive' in display_name.lower():
|
||||||
|
return join_nonempty(language_code, 'desc'), -10
|
||||||
|
if 'original' in display_name.lower():
|
||||||
|
if language_code and not language_map.get(ORIGINAL_LANG_VALUE):
|
||||||
|
language_map[ORIGINAL_LANG_VALUE] = language_code
|
||||||
|
return language_code, ORIGINAL_LANG_VALUE
|
||||||
|
if audio_track.get('audioIsDefault'):
|
||||||
|
if language_code and not language_map.get(DEFAULT_LANG_VALUE):
|
||||||
|
language_map[DEFAULT_LANG_VALUE] = language_code
|
||||||
|
return language_code, DEFAULT_LANG_VALUE
|
||||||
|
return language_code, -1
|
||||||
|
|
||||||
for pr in player_responses:
|
for pr in player_responses:
|
||||||
streaming_data = traverse_obj(pr, 'streamingData')
|
streaming_data = traverse_obj(pr, 'streamingData')
|
||||||
if not streaming_data:
|
if not streaming_data:
|
||||||
@ -3079,7 +3139,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||||||
|
|
||||||
def process_format_stream(fmt_stream, proto, missing_pot):
|
def process_format_stream(fmt_stream, proto, missing_pot):
|
||||||
nonlocal original_language
|
|
||||||
itag = str_or_none(fmt_stream.get('itag'))
|
itag = str_or_none(fmt_stream.get('itag'))
|
||||||
audio_track = fmt_stream.get('audioTrack') or {}
|
audio_track = fmt_stream.get('audioTrack') or {}
|
||||||
quality = fmt_stream.get('quality')
|
quality = fmt_stream.get('quality')
|
||||||
@ -3096,13 +3155,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if height:
|
if height:
|
||||||
res_qualities[height] = quality
|
res_qualities[height] = quality
|
||||||
|
|
||||||
display_name = audio_track.get('displayName') or ''
|
language_code, language_preference = get_language_code_and_preference(fmt_stream)
|
||||||
is_original = 'original' in display_name.lower()
|
|
||||||
is_descriptive = 'descriptive' in display_name.lower()
|
|
||||||
is_default = audio_track.get('audioIsDefault')
|
|
||||||
language_code = audio_track.get('id', '').split('.')[0]
|
|
||||||
if language_code and (is_original or (is_default and not original_language)):
|
|
||||||
original_language = language_code
|
|
||||||
|
|
||||||
has_drm = bool(fmt_stream.get('drmFamilies'))
|
has_drm = bool(fmt_stream.get('drmFamilies'))
|
||||||
|
|
||||||
@ -3138,7 +3191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||||||
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
join_nonempty(display_name, is_default and ' (default)', delim=''),
|
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
||||||
name, fmt_stream.get('isDrc') and 'DRC',
|
name, fmt_stream.get('isDrc') and 'DRC',
|
||||||
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||||
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||||
@ -3155,8 +3208,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||||||
'width': int_or_none(fmt_stream.get('width')),
|
'width': int_or_none(fmt_stream.get('width')),
|
||||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
'language': language_code,
|
||||||
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
'language_preference': language_preference,
|
||||||
# Strictly de-prioritize damaged and 3gp formats
|
# Strictly de-prioritize damaged and 3gp formats
|
||||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||||
}
|
}
|
||||||
@ -3206,6 +3259,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
fmt_url = fmt_stream.get('url')
|
fmt_url = fmt_stream.get('url')
|
||||||
encrypted_sig, sc = None, None
|
encrypted_sig, sc = None, None
|
||||||
if not fmt_url:
|
if not fmt_url:
|
||||||
|
# We still need to register original/default language information
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/14883
|
||||||
|
get_language_code_and_preference(fmt_stream)
|
||||||
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
||||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||||
@ -3391,9 +3447,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
elif itag:
|
elif itag:
|
||||||
f['format_id'] = itag
|
f['format_id'] = itag
|
||||||
|
|
||||||
if original_language and f.get('language') == original_language:
|
lang_code = f.get('language')
|
||||||
|
if lang_code and lang_code == language_map[ORIGINAL_LANG_VALUE]:
|
||||||
|
f['format_note'] = join_nonempty(f.get('format_note'), '(original)', delim=' ')
|
||||||
|
f['language_preference'] = ORIGINAL_LANG_VALUE
|
||||||
|
elif lang_code and lang_code == language_map[DEFAULT_LANG_VALUE]:
|
||||||
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||||||
f['language_preference'] = PREFERRED_LANG_VALUE
|
f['language_preference'] = DEFAULT_LANG_VALUE
|
||||||
|
|
||||||
if itag in ('616', '235'):
|
if itag in ('616', '235'):
|
||||||
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||||||
@ -3988,20 +4048,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
# Youtube Music Auto-generated description
|
# Youtube Music Auto-generated description
|
||||||
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
|
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
|
||||||
# XXX: Causes catastrophic backtracking if description has "·"
|
|
||||||
# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
|
|
||||||
# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
|
|
||||||
# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?=(?P<track>[^\n·]+))(?P=track)·
|
(?:\n|^)(?P<track>[^\n·]+)\ ·\ (?P<artist>[^\n]+)\n+
|
||||||
(?=(?P<artist>[^\n]+))(?P=artist)\n+
|
(?P<album>[^\n]+)\n+
|
||||||
(?=(?P<album>[^\n]+))(?P=album)\n
|
(?:℗\s*(?P<release_year>\d{4}))?
|
||||||
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
|
(?:.+?\nReleased\ on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
|
||||||
(?:.+?Released\ on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
|
(?:.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+)\n)?
|
||||||
(.+?\nArtist\s*:\s*
|
.+\nAuto-generated\ by\ YouTube\.\s*$
|
||||||
(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
|
|
||||||
)?.+\nAuto-generated\ by\ YouTube\.\s*$
|
|
||||||
''', video_description)
|
''', video_description)
|
||||||
if mobj:
|
if mobj:
|
||||||
release_year = mobj.group('release_year')
|
release_year = mobj.group('release_year')
|
||||||
@ -4013,7 +4067,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
info.update({
|
info.update({
|
||||||
'album': mobj.group('album'.strip()),
|
'album': mobj.group('album'.strip()),
|
||||||
'artists': ([a] if (a := mobj.group('clean_artist'))
|
'artists': ([a] if (a := mobj.group('clean_artist'))
|
||||||
else [a.strip() for a in mobj.group('artist').split('·')]),
|
else [a.strip() for a in mobj.group('artist').split(' · ')]),
|
||||||
'track': mobj.group('track').strip(),
|
'track': mobj.group('track').strip(),
|
||||||
'release_date': release_date,
|
'release_date': release_date,
|
||||||
'release_year': int_or_none(release_year),
|
'release_year': int_or_none(release_year),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user