Compare commits

..

No commits in common. "404bd889d0e0b62ad72b7281e3fefdc0497080b3" and "6ae3543d5a1feea0c546571fd2782b024c108eac" have entirely different histories.

4 changed files with 57 additions and 159 deletions

View File

@ -500,14 +500,6 @@ def validate_options(opts):
'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
# Common mistake: -f mp4
if opts.format == 'mp4':
warnings.append('.\n '.join((
'"-f mp4" selects the best pre-merged mp4 format which is often not what\'s intended',
'Pre-merged mp4 formats are not available from all sites, or may only be available in lower quality',
'To prioritize the best h264 video and aac audio in an mp4 container, use "-t mp4" instead',
'If you know what you are doing and want a pre-merged mp4 format, use "-f b[ext=mp4]" instead to suppress this warning')))
# --(postprocessor/downloader)-args without name
def report_args_compat(name, value, key1, key2=None, where=None):
if key1 in value and key2 not in value:

View File

@ -65,7 +65,7 @@ class TikTokBaseIE(InfoExtractor):
@functools.cached_property
def _DEVICE_ID(self):
return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7325099899999994577))
return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))
@functools.cached_property
def _API_HOSTNAME(self):
@ -942,6 +942,7 @@ class TikTokUserIE(TikTokBaseIE):
'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
},
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0'
_API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/'
def _build_web_query(self, sec_uid, cursor):
@ -985,23 +986,9 @@ class TikTokUserIE(TikTokBaseIE):
cursor = int(time.time() * 1E3)
for page in itertools.count(1):
for retry in self.RetryManager():
response = self._download_json(
self._API_BASE_URL, display_id, f'Downloading page {page}',
query=self._build_web_query(sec_uid, cursor))
# Avoid infinite loop caused by bad device_id
# See: https://github.com/yt-dlp/yt-dlp/issues/14031
current_batch = sorted(traverse_obj(response, ('itemList', ..., 'id', {str})))
if current_batch and current_batch == sorted(seen_ids):
message = 'TikTok API keeps sending the same page'
if self._KNOWN_DEVICE_ID:
raise ExtractorError(
f'{message}. Try again with a different device_id', expected=True)
# The user didn't pass a device_id so we can reset it and retry
del self._DEVICE_ID
retry.error = ExtractorError(
f'{message}. Taking measures to avoid an infinite loop', expected=True)
response = self._download_json(
self._API_BASE_URL, display_id, f'Downloading page {page}',
query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT})
for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
video_id = video['id']
@ -1021,52 +1008,42 @@ class TikTokUserIE(TikTokBaseIE):
cursor = old_cursor - 7 * 86_400_000
# In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed
if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'):
return
break
# User directly passed sec_uid via prefix URL, bypassing our private account detection
if not user_name and not seen_ids:
self.raise_login_required(
'This user\'s account is likely private. Log into an account that has access')
def _extract_sec_uid_from_embed(self, user_name):
def _get_sec_uid(self, user_url, user_name, msg):
webpage = self._download_webpage(
f'https://www.tiktok.com/embed/@{user_name}', user_name,
'Downloading user embed page', errnote=False, fatal=False)
if not webpage:
self.report_warning('This user\'s account is either private or has embedding disabled')
return None
data = traverse_obj(self._search_json(
r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
webpage, 'data', user_name, default={}),
('source', 'data', f'/embed/@{user_name}', {dict}))
for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
webpage_url = self._create_url(user_name, aweme_id)
video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
sec_uid = self._parse_aweme_video_web(
video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
if sec_uid:
return sec_uid
return None
user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or ''
return (traverse_obj(self._get_universal_data(webpage, user_name),
('webapp.user-detail', 'userInfo', 'user', 'secUid', {str}))
or traverse_obj(self._get_sigi_state(webpage, user_name),
('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}),
('UserModule', 'users', ..., 'secUid', {str}, any)))
def _real_extract(self, url):
user_name, sec_uid = self._match_id(url), None
if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name):
user_name, sec_uid = None, mobj.group(0)
else:
sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user')
or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live'))
if not sec_uid:
webpage = self._download_webpage(
self._UPLOADER_URL_FORMAT % user_name, user_name,
'Downloading user webpage', 'Unable to download user webpage',
fatal=False, headers={'User-Agent': 'Mozilla/5.0'}) or ''
detail = traverse_obj(
self._get_universal_data(webpage, user_name), ('webapp.user-detail', {dict})) or {}
if detail.get('statusCode') == 10222:
self.raise_login_required(
'This user\'s account is private. Log into an account that has access')
sec_uid = traverse_obj(detail, (
'userInfo', 'user', 'secUid', {str})) or self._extract_sec_uid_from_embed(user_name)
f'https://www.tiktok.com/embed/@{user_name}', user_name,
note='Downloading user embed page', fatal=False) or ''
data = traverse_obj(self._search_json(
r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
webpage, 'data', user_name, default={}),
('source', 'data', f'/embed/@{user_name}', {dict}))
for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
webpage_url = self._create_url(user_name, aweme_id)
video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
sec_uid = self._parse_aweme_video_web(
video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
if sec_uid:
break
if not sec_uid:
raise ExtractorError(

View File

@ -8,7 +8,6 @@ from ..utils import (
int_or_none,
make_archive_id,
mimetype2ext,
parse_qs,
parse_resolution,
str_or_none,
strip_jsonp,
@ -210,11 +209,7 @@ class WeiboIE(WeiboBaseIE):
class WeiboVideoIE(WeiboBaseIE):
_VIDEO_ID_RE = r'\d+:(?:[\da-f]{32}|\d{16,})'
_VALID_URL = [
fr'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>{_VIDEO_ID_RE})',
fr'https?://video\.weibo\.com/show/?\?(?:[^#]+&)?fid=(?P<id>{_VIDEO_ID_RE})',
]
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:(?:[\da-f]{32}|\d{16,}))'
_TESTS = [{
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
'info_dict': {
@ -257,28 +252,6 @@ class WeiboVideoIE(WeiboBaseIE):
'upload_date': '20171226',
'_old_archive_ids': ['weibomobile 4189191225395228'],
},
}, {
'url': 'https://video.weibo.com/show?fid=1034:4967272104787984',
'info_dict': {
'id': '4967273022359838',
'ext': 'mp4',
'display_id': 'Nse4S9TTU',
'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
'uploader': '张婧仪工作室',
'uploader_id': '7610808848',
'uploader_url': 'https://weibo.com/u/7610808848',
'view_count': int,
'like_count': int,
'repost_count': int,
'duration': 85,
'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
'tags': ['婧仪的相册集'],
'timestamp': 1699773545,
'upload_date': '20231112',
'_old_archive_ids': ['weibomobile 4967273022359838'],
},
}]
def _real_extract(self, url):
@ -302,38 +275,6 @@ class WeiboUserIE(WeiboBaseIE):
'uploader': '萧影殿下',
},
'playlist_mincount': 195,
}, {
'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
'info_dict': {
'id': '7610808848',
'title': '张婧仪工作室的视频',
'description': '张婧仪工作室的全部视频',
'uploader': '张婧仪工作室',
},
'playlist_mincount': 61,
}, {
'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
'info_dict': {
'id': '4967273022359838',
'ext': 'mp4',
'display_id': 'Nse4S9TTU',
'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手]',
'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
'uploader': '张婧仪工作室',
'uploader_id': '7610808848',
'uploader_url': 'https://weibo.com/u/7610808848',
'view_count': int,
'like_count': int,
'repost_count': int,
'duration': 85,
'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
'tags': ['婧仪的相册集'],
'timestamp': 1699773545,
'upload_date': '20231112',
'_old_archive_ids': ['weibomobile 4967273022359838'],
},
'params': {'noplaylist': True},
}]
def _fetch_page(self, uid, cursor=0, page=1):
@ -354,11 +295,6 @@ class WeiboUserIE(WeiboBaseIE):
def _real_extract(self, url):
uid = self._match_id(url)
params = {k: v[-1] for k, v in parse_qs(url).items()}
video_id = params.get('layerid') if params.get('tabtype') == 'newVideo' else None
if not self._yes_playlist(uid, video_id):
return self.url_result(f'https://weibo.com/{uid}/{video_id}', WeiboIE, video_id)
first_page = self._fetch_page(uid)
uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
metainfo = {

View File

@ -566,7 +566,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'gridContinuation': (self._grid_entries, None),
'itemSectionContinuation': (self._post_thread_continuation_entries, None),
'sectionListContinuation': (extract_entries, None), # for feeds
'lockupViewModel': (self._grid_entries, 'items'), # for playlists tab
}
continuation_items = traverse_obj(response, (
@ -1027,7 +1026,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1044,7 +1043,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1094,7 +1093,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True,
}, {
# TODO: fix availability and view_count extraction
# TODO: fix availability extraction
'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': {
@ -1216,7 +1215,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'lex will',
},
'playlist_mincount': 18,
'skip': 'This Community isn\'t available',
}, {
# TODO: fix channel_is_verified extraction
'note': 'Search tab',
@ -1401,7 +1399,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'VFGoUmo74wE', # This will keep changing
'id': 'YDvsBbKfLPA', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@ -1580,7 +1578,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 50,
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': {
@ -1600,19 +1597,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist': [{
'info_dict': {
'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film',
'id': 'aqz-KE-bpKQ',
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'id': 'BaW_jenozKc',
'_type': 'url',
'ie_key': 'Youtube',
'duration': 635,
'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
'duration': 10,
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'view_count': int,
'url': 'https://www.youtube.com/watch?v=aqz-KE-bpKQ',
'channel': 'Blender',
'uploader_id': '@BlenderOfficial',
'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
'uploader': 'Blender',
'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
'channel': 'Philipp Hagemeister',
'uploader_id': '@PhilippHagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader': 'Philipp Hagemeister',
},
}],
'playlist_count': 1,
@ -1678,6 +1675,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True,
}, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': {
@ -1696,7 +1694,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'pukkandan',
},
'playlist_mincount': 2,
'skip': 'https://github.com/yt-dlp/yt-dlp/issues/13690',
}, {
'note': 'translated tab name',
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
@ -1804,7 +1801,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'Not Just Bikes - Shorts',
'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:295758591d0d43d8594277be54584da7',
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
@ -1825,7 +1822,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一',
'channel_follower_count': int,
'description': 'md5:76b312b48a26c3b0e4d90e2dfc1b417d',
'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一',
@ -1868,13 +1865,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'tags': [],
},
'playlist_mincount': 30,
'skip': 'The channel/playlist does not exist and the URL redirected to youtube.com home page',
}, {
# Trending Gaming Tab. tab id is empty
'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
'info_dict': {
'id': 'trending',
'title': 'trending',
'title': 'trending - Gaming',
'tags': [],
},
'playlist_mincount': 30,
@ -2022,7 +2018,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'A Himitsu',
'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
'tags': 'count:12',
'description': 'Music producer, sometimes.',
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'channel_is_verified': True,
@ -2308,20 +2304,19 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)
IE_NAME = 'youtube:playlist'
_TESTS = [{
# TODO: fix availability extraction
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
'title': 'Team Fortress 2 [2010 Version]',
'title': '[OLD]Team Fortress 2 (Class-based LP)',
'id': 'PLBB231211A4F62143',
'uploader': 'Wickman Wish',
'uploader_id': '@WickmanWish',
'uploader': 'Wickman',
'uploader_id': '@WickmanVT',
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
'view_count': int,
'uploader_url': 'https://www.youtube.com/@WickmanWish',
'uploader_url': 'https://www.youtube.com/@WickmanVT',
'modified_date': r're:\d{8}',
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'channel': 'Wickman Wish',
'channel': 'Wickman',
'tags': [],
'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
'availability': 'public',
@ -2336,7 +2331,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 2,
'skip': 'This playlist is private',
}, {
# TODO: fix availability extraction
'note': 'embedded',
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
'playlist_count': 4,
@ -2357,7 +2351,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
},
'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
}, {
# TODO: fix availability extraction
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455,
'info_dict': {