Compare commits

...

4 Commits

Author SHA1 Message Date
bashonly
404bd889d0
[ie/weibo] Support more URLs and --no-playlist (#14035)
Authored by: bashonly
2025-08-16 23:02:04 +00:00
bashonly
edf55e8184
[ie/tiktok:user] Avoid infinite loop during extraction (#14032)
Closes #14031
Authored by: bashonly
2025-08-16 22:57:14 +00:00
bashonly
8a8861d538
[ie/youtube:tab] Fix playlists tab extraction (#14030)
Closes #14028
Authored by: bashonly
2025-08-16 22:55:21 +00:00
sepro
70f5669951
Warn against using -f mp4 (#13915)
Authored by: seproDev
2025-08-17 00:35:46 +02:00
4 changed files with 159 additions and 57 deletions

View File

@ -500,6 +500,14 @@ def validate_options(opts):
'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
# Common mistake: -f mp4
if opts.format == 'mp4':
warnings.append('.\n '.join((
'"-f mp4" selects the best pre-merged mp4 format which is often not what\'s intended',
'Pre-merged mp4 formats are not available from all sites, or may only be available in lower quality',
'To prioritize the best h264 video and aac audio in an mp4 container, use "-t mp4" instead',
'If you know what you are doing and want a pre-merged mp4 format, use "-f b[ext=mp4]" instead to suppress this warning')))
# --(postprocessor/downloader)-args without name
def report_args_compat(name, value, key1, key2=None, where=None):
if key1 in value and key2 not in value:

View File

@ -65,7 +65,7 @@ class TikTokBaseIE(InfoExtractor):
@functools.cached_property
def _DEVICE_ID(self):
return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))
return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7325099899999994577))
@functools.cached_property
def _API_HOSTNAME(self):
@ -942,7 +942,6 @@ class TikTokUserIE(TikTokBaseIE):
'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
},
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0'
_API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/'
def _build_web_query(self, sec_uid, cursor):
@ -986,9 +985,23 @@ class TikTokUserIE(TikTokBaseIE):
cursor = int(time.time() * 1E3)
for page in itertools.count(1):
response = self._download_json(
self._API_BASE_URL, display_id, f'Downloading page {page}',
query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT})
for retry in self.RetryManager():
response = self._download_json(
self._API_BASE_URL, display_id, f'Downloading page {page}',
query=self._build_web_query(sec_uid, cursor))
# Avoid infinite loop caused by bad device_id
# See: https://github.com/yt-dlp/yt-dlp/issues/14031
current_batch = sorted(traverse_obj(response, ('itemList', ..., 'id', {str})))
if current_batch and current_batch == sorted(seen_ids):
message = 'TikTok API keeps sending the same page'
if self._KNOWN_DEVICE_ID:
raise ExtractorError(
f'{message}. Try again with a different device_id', expected=True)
# The user didn't pass a device_id so we can reset it and retry
del self._DEVICE_ID
retry.error = ExtractorError(
f'{message}. Taking measures to avoid an infinite loop', expected=True)
for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
video_id = video['id']
@ -1008,42 +1021,52 @@ class TikTokUserIE(TikTokBaseIE):
cursor = old_cursor - 7 * 86_400_000
# In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed
if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'):
break
return
def _get_sec_uid(self, user_url, user_name, msg):
# User directly passed sec_uid via prefix URL, bypassing our private account detection
if not user_name and not seen_ids:
self.raise_login_required(
'This user\'s account is likely private. Log into an account that has access')
def _extract_sec_uid_from_embed(self, user_name):
webpage = self._download_webpage(
user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or ''
return (traverse_obj(self._get_universal_data(webpage, user_name),
('webapp.user-detail', 'userInfo', 'user', 'secUid', {str}))
or traverse_obj(self._get_sigi_state(webpage, user_name),
('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}),
('UserModule', 'users', ..., 'secUid', {str}, any)))
f'https://www.tiktok.com/embed/@{user_name}', user_name,
'Downloading user embed page', errnote=False, fatal=False)
if not webpage:
self.report_warning('This user\'s account is either private or has embedding disabled')
return None
data = traverse_obj(self._search_json(
r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
webpage, 'data', user_name, default={}),
('source', 'data', f'/embed/@{user_name}', {dict}))
for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
webpage_url = self._create_url(user_name, aweme_id)
video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
sec_uid = self._parse_aweme_video_web(
video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
if sec_uid:
return sec_uid
return None
def _real_extract(self, url):
user_name, sec_uid = self._match_id(url), None
if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name):
user_name, sec_uid = None, mobj.group(0)
else:
sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user')
or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live'))
if not sec_uid:
webpage = self._download_webpage(
f'https://www.tiktok.com/embed/@{user_name}', user_name,
note='Downloading user embed page', fatal=False) or ''
data = traverse_obj(self._search_json(
r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
webpage, 'data', user_name, default={}),
('source', 'data', f'/embed/@{user_name}', {dict}))
for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
webpage_url = self._create_url(user_name, aweme_id)
video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
sec_uid = self._parse_aweme_video_web(
video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
if sec_uid:
break
self._UPLOADER_URL_FORMAT % user_name, user_name,
'Downloading user webpage', 'Unable to download user webpage',
fatal=False, headers={'User-Agent': 'Mozilla/5.0'}) or ''
detail = traverse_obj(
self._get_universal_data(webpage, user_name), ('webapp.user-detail', {dict})) or {}
if detail.get('statusCode') == 10222:
self.raise_login_required(
'This user\'s account is private. Log into an account that has access')
sec_uid = traverse_obj(detail, (
'userInfo', 'user', 'secUid', {str})) or self._extract_sec_uid_from_embed(user_name)
if not sec_uid:
raise ExtractorError(

View File

@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
make_archive_id,
mimetype2ext,
parse_qs,
parse_resolution,
str_or_none,
strip_jsonp,
@ -209,7 +210,11 @@ class WeiboIE(WeiboBaseIE):
class WeiboVideoIE(WeiboBaseIE):
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:(?:[\da-f]{32}|\d{16,}))'
_VIDEO_ID_RE = r'\d+:(?:[\da-f]{32}|\d{16,})'
_VALID_URL = [
fr'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>{_VIDEO_ID_RE})',
fr'https?://video\.weibo\.com/show/?\?(?:[^#]+&)?fid=(?P<id>{_VIDEO_ID_RE})',
]
_TESTS = [{
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
'info_dict': {
@ -252,6 +257,28 @@ class WeiboVideoIE(WeiboBaseIE):
'upload_date': '20171226',
'_old_archive_ids': ['weibomobile 4189191225395228'],
},
}, {
'url': 'https://video.weibo.com/show?fid=1034:4967272104787984',
'info_dict': {
'id': '4967273022359838',
'ext': 'mp4',
'display_id': 'Nse4S9TTU',
'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
'uploader': '张婧仪工作室',
'uploader_id': '7610808848',
'uploader_url': 'https://weibo.com/u/7610808848',
'view_count': int,
'like_count': int,
'repost_count': int,
'duration': 85,
'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
'tags': ['婧仪的相册集'],
'timestamp': 1699773545,
'upload_date': '20231112',
'_old_archive_ids': ['weibomobile 4967273022359838'],
},
}]
def _real_extract(self, url):
@ -275,6 +302,38 @@ class WeiboUserIE(WeiboBaseIE):
'uploader': '萧影殿下',
},
'playlist_mincount': 195,
}, {
'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
'info_dict': {
'id': '7610808848',
'title': '张婧仪工作室的视频',
'description': '张婧仪工作室的全部视频',
'uploader': '张婧仪工作室',
},
'playlist_mincount': 61,
}, {
'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
'info_dict': {
'id': '4967273022359838',
'ext': 'mp4',
'display_id': 'Nse4S9TTU',
'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
'uploader': '张婧仪工作室',
'uploader_id': '7610808848',
'uploader_url': 'https://weibo.com/u/7610808848',
'view_count': int,
'like_count': int,
'repost_count': int,
'duration': 85,
'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
'tags': ['婧仪的相册集'],
'timestamp': 1699773545,
'upload_date': '20231112',
'_old_archive_ids': ['weibomobile 4967273022359838'],
},
'params': {'noplaylist': True},
}]
def _fetch_page(self, uid, cursor=0, page=1):
@ -295,6 +354,11 @@ class WeiboUserIE(WeiboBaseIE):
def _real_extract(self, url):
uid = self._match_id(url)
params = {k: v[-1] for k, v in parse_qs(url).items()}
video_id = params.get('layerid') if params.get('tabtype') == 'newVideo' else None
if not self._yes_playlist(uid, video_id):
return self.url_result(f'https://weibo.com/{uid}/{video_id}', WeiboIE, video_id)
first_page = self._fetch_page(uid)
uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
metainfo = {

View File

@ -566,6 +566,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'gridContinuation': (self._grid_entries, None),
'itemSectionContinuation': (self._post_thread_continuation_entries, None),
'sectionListContinuation': (extract_entries, None), # for feeds
'lockupViewModel': (self._grid_entries, 'items'), # for playlists tab
}
continuation_items = traverse_obj(response, (
@ -1026,7 +1027,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1043,7 +1044,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1093,7 +1094,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True,
}, {
# TODO: fix availability extraction
# TODO: fix availability and view_count extraction
'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': {
@ -1215,6 +1216,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'lex will',
},
'playlist_mincount': 18,
'skip': 'This Community isn\'t available',
}, {
# TODO: fix channel_is_verified extraction
'note': 'Search tab',
@ -1399,7 +1401,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'YDvsBbKfLPA', # This will keep changing
'id': 'VFGoUmo74wE', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@ -1578,6 +1580,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 50,
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': {
@ -1597,19 +1600,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist': [{
'info_dict': {
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'id': 'BaW_jenozKc',
'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film',
'id': 'aqz-KE-bpKQ',
'_type': 'url',
'ie_key': 'Youtube',
'duration': 10,
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'duration': 635,
'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
'view_count': int,
'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
'channel': 'Philipp Hagemeister',
'uploader_id': '@PhilippHagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader': 'Philipp Hagemeister',
'url': 'https://www.youtube.com/watch?v=aqz-KE-bpKQ',
'channel': 'Blender',
'uploader_id': '@BlenderOfficial',
'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
'uploader': 'Blender',
},
}],
'playlist_count': 1,
@ -1675,7 +1678,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True,
}, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': {
@ -1694,6 +1696,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'pukkandan',
},
'playlist_mincount': 2,
'skip': 'https://github.com/yt-dlp/yt-dlp/issues/13690',
}, {
'note': 'translated tab name',
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
@ -1801,7 +1804,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'Not Just Bikes - Shorts',
'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
'description': 'md5:295758591d0d43d8594277be54584da7',
'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
@ -1822,7 +1825,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一',
'channel_follower_count': int,
'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
'description': 'md5:76b312b48a26c3b0e4d90e2dfc1b417d',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一',
@ -1865,12 +1868,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': [],
},
'playlist_mincount': 30,
'skip': 'The channel/playlist does not exist and the URL redirected to youtube.com home page',
}, {
# Trending Gaming Tab. tab id is empty
'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
'info_dict': {
'id': 'trending',
'title': 'trending - Gaming',
'title': 'trending',
'tags': [],
},
'playlist_mincount': 30,
@ -2018,7 +2022,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'A Himitsu',
'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
'tags': 'count:12',
'description': 'I make music',
'description': 'Music producer, sometimes.',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'channel_is_verified': True,
@ -2304,19 +2308,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)
IE_NAME = 'youtube:playlist'
_TESTS = [{
# TODO: fix availability extraction
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)',
'title': 'Team Fortress 2 [2010 Version]',
'id': 'PLBB231211A4F62143',
'uploader': 'Wickman',
'uploader_id': '@WickmanVT',
'uploader': 'Wickman Wish',
'uploader_id': '@WickmanWish',
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
'view_count': int,
'uploader_url': 'https://www.youtube.com/@WickmanVT',
'uploader_url': 'https://www.youtube.com/@WickmanWish',
'modified_date': r're:\d{8}',
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'channel': 'Wickman',
'channel': 'Wickman Wish',
'tags': [],
'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
'availability': 'public',
@ -2331,6 +2336,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 2,
'skip': 'This playlist is private',
}, {
# TODO: fix availability extraction
'note': 'embedded',
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
'playlist_count': 4,
@ -2351,6 +2357,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
},
'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
}, {
# TODO: fix availability extraction
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455,
'info_dict': {