[ie/youtube:tab] Fix metadata extraction (#16976)

Authored by: bashonly
This commit is contained in:
bashonly 2026-06-21 17:49:35 -05:00 committed by GitHub
parent 98036ccd4f
commit d6c411bcd0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 70 additions and 60 deletions

View File

@ -1152,6 +1152,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _get_count(self, data, *path_list):
count_text = self._get_text(data, *path_list) or ''
if count_text.lower().startswith('no '):
return 0
count = parse_count(count_text)
if count is None:
count = str_to_int(

View File

@ -752,8 +752,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
})
channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
if self._has_badge(channel_badges, BadgeType.VERIFIED):
if (
self._has_badge(channel_badges, BadgeType.VERIFIED)
or any(image_name in traverse_obj(page_header_view_model, (
'title', 'dynamicTextViewModel', 'text', 'attachmentRuns', ..., 'element', 'type',
'imageType', 'image', 'sources', ..., 'clientResource', 'imageName', {str},
)) for image_name in ('CHECK_CIRCLE_FILLED', 'AUDIO_BADGE'))
):
info['channel_is_verified'] = True
# Playlist stats is a text runs array containing [video count, view count, last updated].
# last updated or (view count and last updated) may be missing.
playlist_stats = get_first(
@ -855,13 +862,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_availability(self, data):
"""
Gets the availability of a given playlist/tab.
Note: Unless YouTube tells us explicitly, we do not assume it is public
@param data: response
"""
sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
sidebar_renderer = self._extract_sidebar_info_renderer(data, ('playlistSidebarPrimaryInfoRenderer', {dict})) or {}
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer', {dict})) or {}
player_header_privacy = playlist_header_renderer.get('privacy')
# NB: Badges are currently not extracted from playlists/tabs
badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
@ -871,15 +878,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
expected_type=str)
microformats_is_unlisted = traverse_obj(
data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
microformat_data_renderer = traverse_obj(data, ('microformat', 'microformatDataRenderer', {dict}))
return (
'public' if (
self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
or player_header_privacy == 'PUBLIC'
or privacy_setting_icon == 'PRIVACY_PUBLIC')
else self._availability(
if availability := self._availability(
# Private playlists are only viewable by the creator
is_private=(
self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
@ -888,10 +890,23 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
else microformats_is_unlisted if microformats_is_unlisted is not None else None),
else traverse_obj(microformat_data_renderer, ('unlisted', {bool}))),
# It is unlikely that members-only or premium-only playlists/tabs exist
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
needs_auth=False))
needs_auth=False,
):
return availability
if (
self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
or player_header_privacy == 'PUBLIC'
or privacy_setting_icon == 'PRIVACY_PUBLIC'
or traverse_obj(microformat_data_renderer, ('noindex', {bool})) is False
):
return 'public'
return None
@staticmethod
def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
@ -1080,6 +1095,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': 'count:19',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int,
'availability': 'public',
},
}, {
'note': 'playlists, multipage, different order',
@ -1089,6 +1105,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor DS: ИИ, Наука и Творчество - Playlists',
'description': r're:(?s)Добро пожаловать! Здесь сложные технологии встречаются.+\n$',
'uploader': 'Igor DS: ИИ, Наука и Творчество ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': 'count:19',
@ -1096,9 +1113,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': 'Igor DS: ИИ, Наука и Творчество ',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int,
'availability': 'public',
},
}, {
# TODO: fix channel_is_verified extraction
'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5,
@ -1115,6 +1132,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': ['Mathematics'],
'channel_follower_count': int,
'channel_is_verified': True,
'availability': 'public',
},
}, {
'note': 'playlists, singlepage',
@ -1132,12 +1150,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': 'count:12',
'channel': 'ThirstForScience',
'channel_follower_count': int,
'availability': 'public',
},
}, {
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True,
}, {
# TODO: fix availability and view_count extraction
'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': {
@ -1172,6 +1190,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
'uploader_id': '@sergeym.6173',
'uploader': 'Sergey M.',
'view_count': int,
},
'playlist_count': 0,
}, {
@ -1189,6 +1208,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
'availability': 'public',
},
'playlist_mincount': 6,
}, {
@ -1206,6 +1226,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
'availability': 'public',
},
'playlist_mincount': 239,
}, {
@ -1223,6 +1244,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
'availability': 'public',
},
'playlist_mincount': 239,
}, {
@ -1240,6 +1262,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
'availability': 'public',
},
'playlist_mincount': 20,
}, {
@ -1257,10 +1280,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCtS3BcCw-tITPFYSvkbP0Bg',
'channel_follower_count': int,
'tags': 'count:17',
'availability': 'public',
'channel_is_verified': True,
},
'playlist_mincount': 145,
}, {
# TODO: fix channel_is_verified extraction
'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40,
@ -1277,6 +1301,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
'availability': 'public',
},
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@ -1288,7 +1313,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True,
}, {
# TODO: fix availability extraction
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': {
@ -1308,7 +1332,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist_count': 96,
}, {
# TODO: fix availability extraction
'note': 'Large playlist',
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
'info_dict': {
@ -1333,7 +1356,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
'only_matching': True,
}, {
# TODO: fix availability extraction
# The 'note' below is outdated: there is no longer a "Load more" button
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
@ -1354,7 +1376,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist_mincount': 10,
}, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'info_dict': {
@ -1375,7 +1396,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 150,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
# TODO: fix availability extraction
'note': 'Playlist with unavailable videos in page 7',
'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
'info_dict': {
@ -1396,7 +1416,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
# TODO: fix availability extraction
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': {
@ -1447,7 +1466,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'ubIX-TwVqZI', # This will keep changing
'id': '8GbcigMs1_c', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@ -1481,20 +1500,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['Ignoring subtitle tracks found in '],
}, {
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
'id': 'a48o2S1cPoo',
'ext': 'mp4',
'title': 'The Young Turks - Live Main Show',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
},
'params': {
'skip_download': True,
},
'only_matching': True,
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
@ -1561,7 +1566,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
'only_matching': True,
}, {
# TODO: fix availability extraction
'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'info_dict': {
@ -1757,13 +1761,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
'availability': 'public',
},
'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
'expected_warnings': ['Preferring "ja"'],
}, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
# TODO: fix availability extraction
'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': {
@ -1786,11 +1790,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
# shorts audio pivot for 2GtVksBMYFM.
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
# TODO: fix extraction
'info_dict': {
'id': 'sfv_audio_pivot',
'title': 'sfv_audio_pivot',
'tags': [],
'availability': 'public',
},
'playlist_mincount': 50,
@ -1802,6 +1806,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
'tags': [],
'availability': 'public',
},
'playlist_mincount': 20,
}, {
@ -1811,6 +1816,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
'tags': [],
'availability': 'public',
},
'playlist_mincount': 8,
}, {
@ -1820,11 +1826,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
'tags': [],
'availability': 'public',
},
'playlist_mincount': 8,
}, {
# Should get three playlists for videos, shorts and streams tabs
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
@ -1839,11 +1845,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'Polka Ch. 尾丸ポルカ',
'uploader_id': '@OmaruPolka',
'channel_is_verified': True,
'availability': 'public',
},
'playlist_count': 3,
}, {
# Shorts tab with channel with handle
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A',
@ -1858,6 +1864,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'Not Just Bikes',
'uploader_id': '@NotJustBikes',
'channel_is_verified': True,
'availability': 'public',
},
'playlist_mincount': 10,
}, {
@ -1875,6 +1882,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@中村悠一のあそびば',
'uploader_id': '@中村悠一のあそびば',
'uploader': '中村悠一',
'availability': 'public',
},
'playlist_mincount': 60,
}, {
@ -1938,6 +1946,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
'availability': 'public',
},
'playlist': [{
'info_dict': {
@ -1959,6 +1968,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
'tags': [],
'availability': 'public',
},
'playlist': [{
'info_dict': {
@ -2018,7 +2028,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True},
'skip': 'channels tab removed',
}, {
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': {
'id': '@3blue1brown',
@ -2033,6 +2042,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
'availability': 'public',
},
'playlist_count': 0,
}, {
@ -2050,11 +2060,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': [],
'channel': '99% Invisible',
'uploader_id': '@99percentinvisiblepodcast',
'availability': 'public',
},
'playlist_mincount': 5,
}, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@AHimitsu/releases',
'info_dict': {
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
@ -2069,13 +2079,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'channel_is_verified': True,
'availability': 'public',
},
'playlist_mincount': 10,
}, {
# Playlist with only shorts, shown as reel renderers
# FIXME: future: YouTube currently doesn't give continuation for this,
# may do in future.
# TODO: fix availability extraction
'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
'info_dict': {
'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
@ -2095,7 +2105,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
# TODO: fix channel_is_verified extraction
'note': 'Tags containing spaces',
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'playlist_count': 3,
@ -2111,6 +2120,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'Markiplier',
'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'channel_is_verified': True,
'availability': 'public',
'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
@ -2133,6 +2143,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'sbcitygov',
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
'tags': [],
'availability': 'public',
},
}]
@ -2352,7 +2363,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)
IE_NAME = 'youtube:playlist'
_TESTS = [{
# TODO: fix availability extraction
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
@ -2380,7 +2390,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 2,
'skip': 'This playlist is private',
}, {
# TODO: fix availability extraction
'note': 'embedded',
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
'playlist_count': 4,
@ -2401,7 +2410,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
},
'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
}, {
# TODO: fix availability extraction
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455,
'info_dict': {