2026-06-19 01:04:42 +00:00
4 changed files with 57 additions and 159 deletions
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -500,14 +500,6 @@ def validate_options(opts):
            'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
            'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))

-    # Common mistake: -f mp4
-    if opts.format == 'mp4':
-        warnings.append('.\n         '.join((
-            '"-f mp4" selects the best pre-merged mp4 format which is often not what\'s intended',
-            'Pre-merged mp4 formats are not available from all sites, or may only be available in lower quality',
-            'To prioritize the best h264 video and aac audio in an mp4 container, use "-t mp4" instead',
-            'If you know what you are doing and want a pre-merged mp4 format, use "-f b[ext=mp4]" instead to suppress this warning')))
-
    # --(postprocessor/downloader)-args without name
    def report_args_compat(name, value, key1, key2=None, where=None):
        if key1 in value and key2 not in value:
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -65,7 +65,7 @@ class TikTokBaseIE(InfoExtractor):

    @functools.cached_property
    def _DEVICE_ID(self):
-        return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7325099899999994577))
+        return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))

    @functools.cached_property
    def _API_HOSTNAME(self):
@ -942,6 +942,7 @@ class TikTokUserIE(TikTokBaseIE):
            'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
        },
    }]
+    _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0'
    _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/'

    def _build_web_query(self, sec_uid, cursor):
@ -985,23 +986,9 @@ class TikTokUserIE(TikTokBaseIE):

        cursor = int(time.time() * 1E3)
        for page in itertools.count(1):
-            for retry in self.RetryManager():
-                response = self._download_json(
-                    self._API_BASE_URL, display_id, f'Downloading page {page}',
-                    query=self._build_web_query(sec_uid, cursor))
-
-                # Avoid infinite loop caused by bad device_id
-                # See: https://github.com/yt-dlp/yt-dlp/issues/14031
-                current_batch = sorted(traverse_obj(response, ('itemList', ..., 'id', {str})))
-                if current_batch and current_batch == sorted(seen_ids):
-                    message = 'TikTok API keeps sending the same page'
-                    if self._KNOWN_DEVICE_ID:
-                        raise ExtractorError(
-                            f'{message}. Try again with a different device_id', expected=True)
-                    # The user didn't pass a device_id so we can reset it and retry
-                    del self._DEVICE_ID
-                    retry.error = ExtractorError(
-                        f'{message}. Taking measures to avoid an infinite loop', expected=True)
+            response = self._download_json(
+                self._API_BASE_URL, display_id, f'Downloading page {page}',
+                query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT})

            for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
                video_id = video['id']
@ -1021,52 +1008,42 @@ class TikTokUserIE(TikTokBaseIE):
                cursor = old_cursor - 7 * 86_400_000
            # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed
            if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'):
-                return
+                break

-            # User directly passed sec_uid via prefix URL, bypassing our private account detection
-            if not user_name and not seen_ids:
-                self.raise_login_required(
-                    'This user\'s account is likely private. Log into an account that has access')
-
-    def _extract_sec_uid_from_embed(self, user_name):
+    def _get_sec_uid(self, user_url, user_name, msg):
        webpage = self._download_webpage(
-            f'https://www.tiktok.com/embed/@{user_name}', user_name,
-            'Downloading user embed page', errnote=False, fatal=False)
-        if not webpage:
-            self.report_warning('This user\'s account is either private or has embedding disabled')
-            return None
-
-        data = traverse_obj(self._search_json(
-            r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
-            webpage, 'data', user_name, default={}),
-            ('source', 'data', f'/embed/@{user_name}', {dict}))
-
-        for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
-            webpage_url = self._create_url(user_name, aweme_id)
-            video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
-            sec_uid = self._parse_aweme_video_web(
-                video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
-            if sec_uid:
-                return sec_uid
-
-        return None
+            user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
+            note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or ''
+        return (traverse_obj(self._get_universal_data(webpage, user_name),
+                             ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str}))
+                or traverse_obj(self._get_sigi_state(webpage, user_name),
+                                ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}),
+                                ('UserModule', 'users', ..., 'secUid', {str}, any)))

    def _real_extract(self, url):
        user_name, sec_uid = self._match_id(url), None
        if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name):
            user_name, sec_uid = None, mobj.group(0)
        else:
+            sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user')
+                       or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live'))
+
+        if not sec_uid:
            webpage = self._download_webpage(
-                self._UPLOADER_URL_FORMAT % user_name, user_name,
-                'Downloading user webpage', 'Unable to download user webpage',
-                fatal=False, headers={'User-Agent': 'Mozilla/5.0'}) or ''
-            detail = traverse_obj(
-                self._get_universal_data(webpage, user_name), ('webapp.user-detail', {dict})) or {}
-            if detail.get('statusCode') == 10222:
-                self.raise_login_required(
-                    'This user\'s account is private. Log into an account that has access')
-            sec_uid = traverse_obj(detail, (
-                'userInfo', 'user', 'secUid', {str})) or self._extract_sec_uid_from_embed(user_name)
+                f'https://www.tiktok.com/embed/@{user_name}', user_name,
+                note='Downloading user embed page', fatal=False) or ''
+            data = traverse_obj(self._search_json(
+                r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
+                webpage, 'data', user_name, default={}),
+                ('source', 'data', f'/embed/@{user_name}', {dict}))
+
+            for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
+                webpage_url = self._create_url(user_name, aweme_id)
+                video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
+                sec_uid = self._parse_aweme_video_web(
+                    video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
+                if sec_uid:
+                    break

        if not sec_uid:
            raise ExtractorError(
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@ -8,7 +8,6 @@ from ..utils import (
    int_or_none,
    make_archive_id,
    mimetype2ext,
-    parse_qs,
    parse_resolution,
    str_or_none,
    strip_jsonp,
@ -210,11 +209,7 @@ class WeiboIE(WeiboBaseIE):


 class WeiboVideoIE(WeiboBaseIE):
-    _VIDEO_ID_RE = r'\d+:(?:[\da-f]{32}|\d{16,})'
-    _VALID_URL = [
-        fr'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>{_VIDEO_ID_RE})',
-        fr'https?://video\.weibo\.com/show/?\?(?:[^#]+&)?fid=(?P<id>{_VIDEO_ID_RE})',
-    ]
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:(?:[\da-f]{32}|\d{16,}))'
    _TESTS = [{
        'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
        'info_dict': {
@ -257,28 +252,6 @@ class WeiboVideoIE(WeiboBaseIE):
            'upload_date': '20171226',
            '_old_archive_ids': ['weibomobile 4189191225395228'],
        },
-    }, {
-        'url': 'https://video.weibo.com/show?fid=1034:4967272104787984',
-        'info_dict': {
-            'id': '4967273022359838',
-            'ext': 'mp4',
-            'display_id': 'Nse4S9TTU',
-            'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手]',
-            'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手]',
-            'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
-            'uploader': '张婧仪工作室',
-            'uploader_id': '7610808848',
-            'uploader_url': 'https://weibo.com/u/7610808848',
-            'view_count': int,
-            'like_count': int,
-            'repost_count': int,
-            'duration': 85,
-            'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
-            'tags': ['婧仪的相册集'],
-            'timestamp': 1699773545,
-            'upload_date': '20231112',
-            '_old_archive_ids': ['weibomobile 4967273022359838'],
-        },
    }]

    def _real_extract(self, url):
@ -302,38 +275,6 @@ class WeiboUserIE(WeiboBaseIE):
            'uploader': '萧影殿下',
        },
        'playlist_mincount': 195,
-    }, {
-        'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
-        'info_dict': {
-            'id': '7610808848',
-            'title': '张婧仪工作室的视频',
-            'description': '张婧仪工作室的全部视频',
-            'uploader': '张婧仪工作室',
-        },
-        'playlist_mincount': 61,
-    }, {
-        'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838',
-        'info_dict': {
-            'id': '4967273022359838',
-            'ext': 'mp4',
-            'display_id': 'Nse4S9TTU',
-            'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手]',
-            'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手]',
-            'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天，小张@张婧仪 变身可可爱爱小导游，来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B',
-            'uploader': '张婧仪工作室',
-            'uploader_id': '7610808848',
-            'uploader_url': 'https://weibo.com/u/7610808848',
-            'view_count': int,
-            'like_count': int,
-            'repost_count': int,
-            'duration': 85,
-            'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg',
-            'tags': ['婧仪的相册集'],
-            'timestamp': 1699773545,
-            'upload_date': '20231112',
-            '_old_archive_ids': ['weibomobile 4967273022359838'],
-        },
-        'params': {'noplaylist': True},
    }]

    def _fetch_page(self, uid, cursor=0, page=1):
@ -354,11 +295,6 @@ class WeiboUserIE(WeiboBaseIE):

    def _real_extract(self, url):
        uid = self._match_id(url)
-        params = {k: v[-1] for k, v in parse_qs(url).items()}
-        video_id = params.get('layerid') if params.get('tabtype') == 'newVideo' else None
-        if not self._yes_playlist(uid, video_id):
-            return self.url_result(f'https://weibo.com/{uid}/{video_id}', WeiboIE, video_id)
-
        first_page = self._fetch_page(uid)
        uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
        metainfo = {
--- a/yt_dlp/extractor/youtube/_tab.py
+++ b/yt_dlp/extractor/youtube/_tab.py
@ -566,7 +566,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
                'gridContinuation': (self._grid_entries, None),
                'itemSectionContinuation': (self._post_thread_continuation_entries, None),
                'sectionListContinuation': (extract_entries, None),  # for feeds
-                'lockupViewModel': (self._grid_entries, 'items'),  # for playlists tab
            }

            continuation_items = traverse_obj(response, (
@ -1027,7 +1026,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'info_dict': {
            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
            'title': 'Igor Kleiner  - Playlists',
-            'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
+            'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
            'uploader': 'Igor Kleiner ',
            'uploader_id': '@IgorDataScience',
            'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1044,7 +1043,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'info_dict': {
            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
            'title': 'Igor Kleiner  - Playlists',
-            'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
+            'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
            'uploader': 'Igor Kleiner ',
            'uploader_id': '@IgorDataScience',
            'uploader_url': 'https://www.youtube.com/@IgorDataScience',
@ -1094,7 +1093,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
        'only_matching': True,
    }, {
-        # TODO: fix availability and view_count extraction
+        # TODO: fix availability extraction
        'note': 'basic, single video playlist',
        'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
        'info_dict': {
@ -1216,7 +1215,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'uploader': 'lex will',
        },
        'playlist_mincount': 18,
-        'skip': 'This Community isn\'t available',
    }, {
        # TODO: fix channel_is_verified extraction
        'note': 'Search tab',
@ -1401,7 +1399,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
    }, {
        'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
        'info_dict': {
-            'id': 'VFGoUmo74wE',  # This will keep changing
+            'id': 'YDvsBbKfLPA',  # This will keep changing
            'ext': 'mp4',
            'title': str,
            'upload_date': r're:\d{8}',
@ -1580,7 +1578,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'playlist_count': 50,
        'expected_warnings': ['YouTube Music is not directly supported'],
    }, {
-        # TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
        'note': 'unlisted single video playlist',
        'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
        'info_dict': {
@ -1600,19 +1597,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        },
        'playlist': [{
            'info_dict': {
-                'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film',
-                'id': 'aqz-KE-bpKQ',
+                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+                'id': 'BaW_jenozKc',
                '_type': 'url',
                'ie_key': 'Youtube',
-                'duration': 635,
-                'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
-                'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
+                'duration': 10,
+                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+                'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                'view_count': int,
-                'url': 'https://www.youtube.com/watch?v=aqz-KE-bpKQ',
-                'channel': 'Blender',
-                'uploader_id': '@BlenderOfficial',
-                'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
-                'uploader': 'Blender',
+                'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
+                'channel': 'Philipp Hagemeister',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+                'uploader': 'Philipp Hagemeister',
            },
        }],
        'playlist_count': 1,
@ -1678,6 +1675,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
        'only_matching': True,
    }, {
+        # TODO: fix metadata extraction
        'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
        'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
        'info_dict': {
@ -1696,7 +1694,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'uploader': 'pukkandan',
        },
        'playlist_mincount': 2,
-        'skip': 'https://github.com/yt-dlp/yt-dlp/issues/13690',
    }, {
        'note': 'translated tab name',
        'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
@ -1804,7 +1801,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'title': 'Not Just Bikes - Shorts',
            'tags': 'count:10',
            'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
-            'description': 'md5:295758591d0d43d8594277be54584da7',
+            'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
            'channel_follower_count': int,
            'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
            'channel': 'Not Just Bikes',
@ -1825,7 +1822,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
            'channel': '中村悠一',
            'channel_follower_count': int,
-            'description': 'md5:76b312b48a26c3b0e4d90e2dfc1b417d',
+            'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
            'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
            'uploader_id': '@Yuichi-Nakamura',
            'uploader': '中村悠一',
@ -1868,13 +1865,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'tags': [],
        },
        'playlist_mincount': 30,
-        'skip': 'The channel/playlist does not exist and the URL redirected to youtube.com home page',
    }, {
        # Trending Gaming Tab. tab id is empty
        'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
        'info_dict': {
            'id': 'trending',
-            'title': 'trending',
+            'title': 'trending - Gaming',
            'tags': [],
        },
        'playlist_mincount': 30,
@ -2022,7 +2018,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
            'uploader': 'A Himitsu',
            'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
            'tags': 'count:12',
-            'description': 'Music producer, sometimes.',
+            'description': 'I make music',
            'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
            'channel_follower_count': int,
            'channel_is_verified': True,
@ -2308,20 +2304,19 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    )
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
-        # TODO: fix availability extraction
        'note': 'issue #673',
        'url': 'PLBB231211A4F62143',
        'info_dict': {
-            'title': 'Team Fortress 2 [2010 Version]',
+            'title': '[OLD]Team Fortress 2 (Class-based LP)',
            'id': 'PLBB231211A4F62143',
-            'uploader': 'Wickman Wish',
-            'uploader_id': '@WickmanWish',
+            'uploader': 'Wickman',
+            'uploader_id': '@WickmanVT',
            'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
            'view_count': int,
-            'uploader_url': 'https://www.youtube.com/@WickmanWish',
+            'uploader_url': 'https://www.youtube.com/@WickmanVT',
            'modified_date': r're:\d{8}',
            'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
-            'channel': 'Wickman Wish',
+            'channel': 'Wickman',
            'tags': [],
            'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
            'availability': 'public',
@ -2336,7 +2331,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        'playlist_count': 2,
        'skip': 'This playlist is private',
    }, {
-        # TODO: fix availability extraction
        'note': 'embedded',
        'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
        'playlist_count': 4,
@ -2357,7 +2351,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        },
        'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
    }, {
-        # TODO: fix availability extraction
        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
        'playlist_mincount': 455,
        'info_dict': {