[ie/twitter:broadcast] Support events URLs (#13248 )

Closes #12989 Authored by: doe1080
[ie/podchaser] Fix extractor (#13271 )
2026-06-20 17:54:50 +00:00 · 2025-05-23 19:25:56 +00:00 · 2025-05-23 17:42:24 +00:00 · 2025-05-23 17:29:55 +00:00 · 2025-05-23 12:58:53 +00:00 · 2025-05-23 12:53:36 +00:00
6 changed files with 222 additions and 32 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -2147,6 +2147,7 @@ from .toggle import (
 from .toggo import ToggoIE
 from .tonline import TOnlineIE
 from .toongoggles import ToonGogglesIE
+from .toutiao import ToutiaoIE
 from .toutv import TouTvIE
 from .toypics import (
    ToypicsIE,
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -340,8 +340,9 @@ class PatreonIE(PatreonBaseIE):
                    'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
                }))

-        # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
-        headers = {'referer': url}
+        # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
+        # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
+        headers = {'referer': 'https://www.patreon.com/'}

        # handle Vimeo embeds
        if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
@ -352,7 +353,7 @@ class PatreonIE(PatreonBaseIE):
                    v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
                    fatal=False, errnote=False, expected_status=429):  # 429 is TLS fingerprint rejection
                entries.append(self.url_result(
-                    VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+                    VimeoIE._smuggle_referrer(v_url, headers['referer']),
                    VimeoIE, url_transparent=True))

        embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
--- a/yt_dlp/extractor/podchaser.py
+++ b/yt_dlp/extractor/podchaser.py
@ -5,11 +5,13 @@ from .common import InfoExtractor
 from ..utils import (
    OnDemandPagedList,
    float_or_none,
+    int_or_none,
+    orderedSet,
    str_or_none,
-    str_to_int,
-    traverse_obj,
    unified_timestamp,
+    url_or_none,
 )
+from ..utils.traversal import require, traverse_obj


 class PodchaserIE(InfoExtractor):
@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
            'id': '104365585',
            'title': 'Ep. 285 – freeze me off',
            'description': 'cam ahn',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:https?://.+/.+\.jpg',
            'ext': 'mp3',
-            'categories': ['Comedy'],
+            'categories': ['Comedy', 'News', 'Politics', 'Arts'],
            'tags': ['comedy', 'dark humor'],
-            'series': 'Cum Town',
+            'series': 'The Adam Friedland Show Podcast',
            'duration': 3708,
            'timestamp': 1636531259,
            'upload_date': '20211110',
            'average_rating': 4.0,
+            'series_id': '36924',
        },
    }, {
        'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
        'info_dict': {
            'id': '28853',
            'title': 'The Bone Zone',
-            'description': 'Podcast by The Bone Zone',
+            'description': r're:The official home of the Bone Zone podcast.+',
        },
-        'playlist_count': 275,
+        'playlist_mincount': 275,
    }, {
        'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
        'info_dict': {
@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):

    @staticmethod
    def _parse_episode(episode, podcast):
-        return {
-            'id': str(episode.get('id')),
-            'title': episode.get('title'),
-            'description': episode.get('description'),
-            'url': episode.get('audio_url'),
-            'thumbnail': episode.get('image_url'),
-            'duration': str_to_int(episode.get('length')),
-            'timestamp': unified_timestamp(episode.get('air_date')),
-            'average_rating': float_or_none(episode.get('rating')),
-            'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
-            'tags': traverse_obj(podcast, ('tags', ..., 'text')),
-            'series': podcast.get('title'),
-        }
+        info = traverse_obj(episode, {
+            'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
+            'title': ('title', {str}),
+            'description': ('description', {str}),
+            'url': ('audio_url', {url_or_none}),
+            'thumbnail': ('image_url', {url_or_none}),
+            'duration': ('length', {int_or_none}),
+            'timestamp': ('air_date', {unified_timestamp}),
+            'average_rating': ('rating', {float_or_none}),
+        })
+        info.update(traverse_obj(podcast, {
+            'series': ('title', {str}),
+            'series_id': ('id', {int}, {str_or_none}),
+            'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
+            'tags': ('tags', ..., 'text', {str}),
+        }))
+        info['vcodec'] = 'none'
+
+        if info.get('series_id'):
+            podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
+            episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
+            info['webpage_url'] = '/'.join((
+                'https://www.podchaser.com/podcasts',
+                '-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
+                '-'.join((episode_slug[:30].rstrip('-'), info['id']))))
+
+        return info

    def _call_api(self, path, *args, **kwargs):
        return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
@ -93,5 +110,5 @@ class PodchaserIE(InfoExtractor):
                OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
                str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))

-        episode = self._call_api(f'episodes/{episode_id}', episode_id)
+        episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
        return self._parse_episode(episode, podcast)
--- a/yt_dlp/extractor/toutiao.py
+++ b/yt_dlp/extractor/toutiao.py
@ -0,0 +1,121 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    str_or_none,
+    try_call,
+    url_or_none,
+)
+from ..utils.traversal import find_element, traverse_obj
+
+
+class ToutiaoIE(InfoExtractor):
+    IE_NAME = 'toutiao'
+    IE_DESC = '今日头条'
+
+    _VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://www.toutiao.com/video/7505382061495176511/',
+        'info_dict': {
+            'id': '7505382061495176511',
+            'ext': 'mp4',
+            'title': '新疆多地现不明飞行物，目击者称和月亮一样亮，几秒内突然加速消失，气象部门回应',
+            'comment_count': int,
+            'duration': 9.753,
+            'like_count': int,
+            'release_date': '20250517',
+            'release_timestamp': 1747483344,
+            'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
+            'uploader': '极目新闻',
+            'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://www.toutiao.com/video/7479446610359878153/',
+        'info_dict': {
+            'id': '7479446610359878153',
+            'ext': 'mp4',
+            'title': '小伙竟然利用两块磁铁制作成磁力减震器，简直太有创意了！',
+            'comment_count': int,
+            'duration': 118.374,
+            'like_count': int,
+            'release_date': '20250308',
+            'release_timestamp': 1741444368,
+            'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
+            'uploader': '小莉创意发明',
+            'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
+            'view_count': int,
+        },
+    }]
+
+    def _real_initialize(self):
+        if self._get_cookies('https://www.toutiao.com').get('ttwid'):
+            return
+
+        urlh = self._request_webpage(
+            'https://ttwid.bytedance.com/ttwid/union/register/', None,
+            'Fetching ttwid', 'Unable to fetch ttwid', headers={
+                'Content-Type': 'application/json',
+            }, data=json.dumps({
+                'aid': 24,
+                'needFid': False,
+                'region': 'cn',
+                'service': 'www.toutiao.com',
+                'union': True,
+            }).encode(),
+        )
+
+        if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
+            self._set_cookie('.toutiao.com', 'ttwid', ttwid)
+            return
+
+        self.raise_login_required()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_data = traverse_obj(webpage, (
+            {find_element(tag='script', id='RENDER_DATA')},
+            {urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
+        ))
+
+        formats = []
+        for video in traverse_obj(video_data, (
+            'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
+        )):
+            formats.append({
+                'url': video['main_url'],
+                **traverse_obj(video, ('video_meta', {
+                    'acodec': ('audio_profile', {str}),
+                    'asr': ('audio_sample_rate', {int_or_none}),
+                    'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
+                    'ext': ('vtype', {str}),
+                    'filesize': ('size', {int_or_none}),
+                    'format_id': ('definition', {str}),
+                    'fps': ('fps', {int_or_none}),
+                    'height': ('vheight', {int_or_none}),
+                    'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
+                    'vcodec': ('codec_type', {str}),
+                    'width': ('vwidth', {int_or_none}),
+                })),
+            })
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            **traverse_obj(video_data, {
+                'comment_count': ('commentCount', {int_or_none}),
+                'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
+                'like_count': ('repinCount', {int_or_none}),
+                'release_timestamp': ('publishTime', {int_or_none}),
+                'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
+                'title': ('title', {str}),
+                'uploader': ('userInfo', 'name', {str}),
+                'uploader_id': ('userInfo', 'userId', {str_or_none}),
+                'view_count': ('playCount', {int_or_none}),
+                'webpage_url': ('detailUrl', {url_or_none}),
+            }),
+        }
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@ -1,4 +1,5 @@
 import base64
+import hashlib
 import itertools
 import re

@ -16,6 +17,7 @@ from ..utils import (
    str_to_int,
    try_get,
    unified_timestamp,
+    update_url_query,
    url_or_none,
    urlencode_postdata,
    urljoin,
@ -171,6 +173,10 @@ class TwitCastingIE(InfoExtractor):
                    'player': 'pc_web',
                })

+            password_params = {
+                'word': hashlib.md5(video_password.encode()).hexdigest(),
+            } if video_password else None
+
            formats = []
            # low: 640x360, medium: 1280x720, high: 1920x1080
            qq = qualities(['low', 'medium', 'high'])
@ -178,7 +184,7 @@ class TwitCastingIE(InfoExtractor):
                'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
            )):
                formats.append({
-                    'url': m3u8_url,
+                    'url': update_url_query(m3u8_url, password_params),
                    'format_id': f'hls-{quality}',
                    'ext': 'mp4',
                    'quality': qq(quality),
@ -192,7 +198,7 @@ class TwitCastingIE(InfoExtractor):
                    'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
                )):
                    formats.append({
-                        'url': ws_url,
+                        'url': update_url_query(ws_url, password_params),
                        'format_id': f'ws-{mode}',
                        'ext': 'mp4',
                        'quality': qq(mode),
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -20,7 +20,6 @@ from ..utils import (
    remove_end,
    str_or_none,
    strip_or_none,
-    traverse_obj,
    truncate_string,
    try_call,
    try_get,
@ -29,6 +28,7 @@ from ..utils import (
    url_or_none,
    xpath_text,
 )
+from ..utils.traversal import require, traverse_obj


 class TwitterBaseIE(InfoExtractor):
@ -1596,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):

 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
    IE_NAME = 'twitter:broadcast'
-    _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'

+    _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
    _TESTS = [{
        # untitled Periscope video
        'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
@ -1605,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'id': '1yNGaQLWpejGj',
            'ext': 'mp4',
            'title': 'Andrea May Sahouri - Periscope Broadcast',
+            'display_id': '1yNGaQLWpejGj',
            'uploader': 'Andrea May Sahouri',
            'uploader_id': 'andreamsahouri',
            'uploader_url': 'https://twitter.com/andreamsahouri',
@ -1612,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'upload_date': '20200601',
            'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
            'view_count': int,
+            'concurrent_view_count': int,
+            'live_status': 'was_live',
        },
    }, {
        'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
@ -1619,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'id': '1ZkKzeyrPbaxv',
            'ext': 'mp4',
            'title': 'Starship | SN10 | High-Altitude Flight Test',
+            'display_id': '1ZkKzeyrPbaxv',
            'uploader': 'SpaceX',
            'uploader_id': 'SpaceX',
            'uploader_url': 'https://twitter.com/SpaceX',
@ -1626,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'upload_date': '20210303',
            'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
            'view_count': int,
+            'concurrent_view_count': int,
+            'live_status': 'was_live',
        },
    }, {
        'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
@ -1633,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'id': '1OyKAVQrgzwGb',
            'ext': 'mp4',
            'title': 'Starship Flight Test',
+            'display_id': '1OyKAVQrgzwGb',
            'uploader': 'SpaceX',
            'uploader_id': 'SpaceX',
            'uploader_url': 'https://twitter.com/SpaceX',
@ -1640,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
            'upload_date': '20230420',
            'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
            'view_count': int,
+            'concurrent_view_count': int,
+            'live_status': 'was_live',
+        },
+    }, {
+        'url': 'https://x.com/i/events/1910629646300762112',
+        'info_dict': {
+            'id': '1LyxBWDRNqyKN',
+            'ext': 'mp4',
+            'title': '#ガンニバル ウォッチパーティー',
+            'concurrent_view_count': int,
+            'display_id': '1910629646300762112',
+            'live_status': 'was_live',
+            'release_date': '20250423',
+            'release_timestamp': 1745409000,
+            'tags': ['ガンニバル'],
+            'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
+            'timestamp': 1745403328,
+            'upload_date': '20250423',
+            'uploader': 'ディズニープラス公式',
+            'uploader_id': 'DisneyPlusJP',
+            'uploader_url': 'https://twitter.com/DisneyPlusJP',
+            'view_count': int,
        },
    }]

    def _real_extract(self, url):
-        broadcast_id = self._match_id(url)
+        broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
+
+        if broadcast_type == 'events':
+            timeline = self._call_api(
+                f'live_event/1/{display_id}/timeline.json', display_id)
+            broadcast_id = traverse_obj(timeline, (
+                'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
+                {str}, any, {require('broadcast ID')}))
+        else:
+            broadcast_id = display_id
+
        broadcast = self._call_api(
            'broadcasts/show.json', broadcast_id,
            {'ids': broadcast_id})['broadcasts'][broadcast_id]
        if not broadcast:
            raise ExtractorError('Broadcast no longer exists', expected=True)
        info = self._parse_broadcast_data(broadcast, broadcast_id)
-        info['title'] = broadcast.get('status') or info.get('title')
-        info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
-        info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
+        info.update({
+            'display_id': display_id,
+            'title': broadcast.get('status') or info.get('title'),
+            'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
+            'uploader_url': format_field(
+                broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
+        })
        if info['live_status'] == 'is_upcoming':
+            self.raise_no_formats('This live broadcast has not yet started', expected=True)
            return info

        media_key = broadcast['media_key']
Author	SHA1	Message	Date
doe1080	7794374de8	[ie/twitter:broadcast] Support events URLs (#13248 ) Closes #12989 Authored by: doe1080	2025-05-23 19:25:56 +00:00
bashonly	538eb30567	[ie/podchaser] Fix extractor (#13271 ) Closes #13269 Authored by: bashonly	2025-05-23 17:42:24 +00:00
doe1080	f8051e3a61	[ie/toutiao] Add extractor (#13246 ) Closes #12125 Authored by: doe1080	2025-05-23 17:29:55 +00:00
bashonly	52f9729c9a	[ie/twitcasting] Fix password-protected livestream support (#13097 ) Closes #13096 Authored by: bashonly	2025-05-23 12:58:53 +00:00
bashonly	1a8a03ea8d	[ie/patreon] Fix referer header used for embeds (#13276 ) Fix e0d6c0822930f6e63f574d46d946a58b73ecd10c Closes #13263 Authored by: bashonly	2025-05-23 12:53:36 +00:00