[ie/mixlr] Add extractors (#13561 )

Authored by: ShockedPlot7560, seproDev Co-authored-by: sepro <sepro@sepr0.com>
[ie] Rework _search_nextjs_v13_data helper (#13711 )
2026-06-21 18:24:47 +00:00 · 2025-07-13 01:35:51 +02:00 · 2025-07-12 23:12:05 +00:00 · 2025-07-12 22:12:46 +00:00 · 2025-07-12 21:56:11 +02:00 · 2025-07-12 19:23:22 +00:00
10 changed files with 580 additions and 110 deletions
--- a/README.md
+++ b/README.md
@ -1901,6 +1901,10 @@ The following extractors use this feature:
 #### tver
 * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated)
 #### vimeo
 * `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies
 * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -1959,6 +1959,37 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
        with self.assertWarns(DeprecationWarning):
            self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
    def test_search_nextjs_v13_data(self):
        HTML = R'''
            <script>(self.__next_f=self.__next_f||[]).push([0])</script>
            <script>self.__next_f.push([2,"0:[\"$\",\"$L0\",null,{\"do_not_add_this\":\"fail\"}]\n"])</script>
            <script>self.__next_f.push([1,"1:I[46975,[],\"HTTPAccessFallbackBoundary\"]\n2:I[32630,[\"8183\",\"static/chunks/8183-768193f6a9e33cdd.js\"]]\n"])</script>
            <script nonce="abc123">self.__next_f.push([1,"e:[false,[\"$\",\"div\",null,{\"children\":[\"$\",\"$L18\",null,{\"foo\":\"bar\"}]}],false]\n    "])</script>
            <script>self.__next_f.push([1,"2a:[[\"$\",\"div\",null,{\"className\":\"flex flex-col\",\"children\":[]}],[\"$\",\"$L16\",null,{\"meta\":{\"dateCreated\":1730489700,\"uuid\":\"40cac41d-8d29-4ef5-aa11-75047b9f0907\"}}]]\n"])</script>
            <script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
            <script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
            '''
        EXPECTED = {
            '18': {
                'foo': 'bar',
            },
            '16': {
                'meta': {
                    'dateCreated': 1730489700,
                    'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
                },
            },
            '19': {
                'duplicated_field_name': {'x': 1},
            },
            '20': {
                'duplicated_field_name': {'y': 2},
            },
        }
        self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
        self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {})
        self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {})
    def test_search_nuxt_json(self):
        HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
        VALID_DATA = '''
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -309,6 +309,7 @@ from .brilliantpala import (
    BrilliantpalaClassesIE,
    BrilliantpalaElearnIE,
 )
 from .btvplus import BTVPlusIE
 from .bundesliga import BundesligaIE
 from .bundestag import BundestagIE
 from .bunnycdn import BunnyCdnIE
@ -1168,6 +1169,10 @@ from .mixcloud import (
    MixcloudPlaylistIE,
    MixcloudUserIE,
 )
 from .mixlr import (
    MixlrIE,
    MixlrRecoringIE,
 )
 from .mlb import (
    MLBIE,
    MLBTVIE,
--- a/yt_dlp/extractor/btvplus.py
+++ b/yt_dlp/extractor/btvplus.py
@ -0,0 +1,73 @@
 from .common import InfoExtractor
 from ..utils import (
    bug_reports_message,
    clean_html,
    get_element_by_class,
    js_to_json,
    mimetype2ext,
    strip_or_none,
    url_or_none,
    urljoin,
 )
 from ..utils.traversal import traverse_obj
 class BTVPlusIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g',
        'info_dict': {
            'ext': 'mp4',
            'id': '67271',
            'title': 'bTV Репортерите - 12.07.2025 г.',
            'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg',
        },
    }, {
        'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55',
        'info_dict': {
            'ext': 'mp4',
            'id': '66942',
            'title': 'Плен - сезон 2, епизод 55',
            'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg',
        },
    }, {
        'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        player_url = self._search_regex(
            r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]',
            webpage, 'player URL')
        player_config = self._download_json(
            urljoin('https://btvplus.bg', player_url), video_id)['config']
        videojs_data = self._search_json(
            r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data',
            video_id, transform_source=js_to_json)
        formats = []
        subtitles = {}
        for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))):
            ext = mimetype2ext(src.get('type'))
            if ext == 'm3u8':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            else:
                self.report_warning(f'Unknown format type {ext}{bug_reports_message()}')
        return {
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
            'title': (
                strip_or_none(self._og_search_title(webpage, default=None))
                or clean_html(get_element_by_class('product-title', webpage))),
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'description': self._og_search_description(webpage, default=None),
        }
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1783,6 +1783,59 @@ class InfoExtractor:
            r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
            video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
    def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
        """Parses Next.js app router flight data that was introduced in Next.js v13"""
        nextjs_data = {}
        if not fatal and not isinstance(webpage, str):
            return nextjs_data
        def flatten(flight_data):
            if not isinstance(flight_data, list):
                return
            if len(flight_data) == 4 and flight_data[0] == '$':
                _, name, _, data = flight_data
                if not isinstance(data, dict):
                    return
                children = data.pop('children', None)
                if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name):
                    # It is useful hydration JSON data
                    nextjs_data[name[2:]] = data
                flatten(children)
                return
            for f in flight_data:
                flatten(f)
        flight_text = ''
        # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source
        # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189
        for flight_segment in re.findall(r'<script\b[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
            segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
            # Some earlier versions of next.js "optimized" away this array structure; this is unsupported
            # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
            if not isinstance(segment, list) or len(segment) != 2:
                self.write_debug(
                    f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
                continue
            # Only use the relevant payload type (1 == data)
            # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14
            payload_type, chunk = segment
            if payload_type == 1:
                flight_text += chunk
        for f in flight_text.splitlines():
            prefix, _, body = f.lstrip().partition(':')
            if not re.fullmatch(r'[0-9a-f]+', prefix):
                continue
            # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal
            if body.startswith('[') and body.endswith(']'):
                flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
            elif body.startswith('{') and body.endswith('}'):
                data = self._parse_json(body, video_id, fatal=False, errnote=False)
                if data is not None:
                    nextjs_data[prefix] = data
        return nextjs_data
    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
        rectx = re.escape(context_name)
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@ -1,4 +1,3 @@
 import json
 import re
 import urllib.parse
@ -19,7 +18,11 @@ from ..utils import (
    unsmuggle_url,
    url_or_none,
 )
-from ..utils.traversal import find_element, traverse_obj
+from ..utils.traversal import (
    find_element,
    get_first,
    traverse_obj,
 )
 class FranceTVBaseInfoExtractor(InfoExtractor):
@ -258,7 +261,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
    _TESTS = [{
        'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
        'info_dict': {
-            'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',  # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
+            'id': 'b2cf9fd8-e971-4757-8651-848f2772df61',  # old: ec217ecc-0733-48cf-ac06-af1347b849d1
            'ext': 'mp4',
            'title': '13h15, le dimanche... - Les mystères de Jésus',
            'timestamp': 1502623500,
@ -269,7 +272,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
        'params': {
            'skip_download': True,
        },
-        'add_ie': [FranceTVIE.ie_key()],
+        'skip': 'Unfortunately, this video is no longer available',
    }, {
        # geo-restricted
        'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
@ -287,7 +290,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 1441,
        },
-        'skip': 'No longer available',
+        'skip': 'Unfortunately, this video is no longer available',
    }, {
        # geo-restricted livestream (workflow == 'token-akamai')
        'url': 'https://www.france.tv/france-4/direct.html',
@ -308,6 +311,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
            'live_status': 'is_live',
        },
        'params': {'skip_download': 'livestream'},
    }, {
        # Not geo-restricted
        'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html',
        'info_dict': {
            'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df',
            'ext': 'mp4',
            'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023',
            'duration': 1065,
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1703147921,
            'upload_date': '20231221',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        # france3
        'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@ -342,30 +358,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
        'only_matching': True,
    }]
    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
    def _find_json(self, s):
        return self._search_json(
            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
-        nextjs_data = traverse_obj(
+        if get_first(nextjs_data, ('isLive', {bool})):
            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
            (..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
        if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
            # For livestreams we need the id of the stream instead of the currently airing episode id
-            video_id = traverse_obj(nextjs_data, (
+            video_id = get_first(nextjs_data, ('options', 'id', {str}))
                ..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
                'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
                'options', 'id', {str}, any))
        else:
-            video_id = traverse_obj(nextjs_data, (
+            video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str}))
                ..., ..., ..., 'children',
                lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
                'video', ('playerReplayId', 'siId'), {str}, any))
        if not video_id:
            raise ExtractorError('Unable to extract video ID')
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@ -5,16 +5,11 @@ import hashlib
 import hmac
 import json
 import os
 import re
 import urllib.parse
 from .common import InfoExtractor
-from ..utils import (
+from ..utils import ExtractorError, int_or_none
-    ExtractorError,
+from ..utils.traversal import get_first, traverse_obj
    int_or_none,
    remove_end,
    traverse_obj,
 )
 class GoPlayIE(InfoExtractor):
@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor):
        'info_dict': {
            'id': '2baa4560-87a0-421b-bffc-359914e3c387',
            'ext': 'mp4',
-            'title': 'S22 - Aflevering 1',
+            'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1',
            'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
            'series': 'De Slimste Mens ter Wereld',
-            'episode': 'Episode 1',
+            'episode': 'Wordt aangekondigd',
            'season_number': 22,
            'episode_number': 1,
            'season': 'Season 22',
@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor):
        'info_dict': {
            'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
            'ext': 'mp4',
-            'title': 'S11 - Aflevering 1',
+            'title': 'De Mol - S11 - Aflevering 1',
            'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
            'episode': 'Episode 1',
            'series': 'De Mol',
@ -75,21 +70,13 @@ class GoPlayIE(InfoExtractor):
        if not self._id_token:
            raise self.raise_login_required(method='password')
    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
    def _find_json(self, s):
        return self._search_json(
            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
-        nextjs_data = traverse_obj(
+        nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
-            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
+        meta = get_first(nextjs_data, (
-            (..., {json.loads}, ..., {self._find_json}, ...))
+            lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path))
        meta = traverse_obj(nextjs_data, (
            ..., ..., 'children', ..., ..., 'children',
            lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
        video_id = meta['uuid']
        info_dict = traverse_obj(meta, {
@ -98,19 +85,18 @@ class GoPlayIE(InfoExtractor):
        })
        if traverse_obj(meta, ('program', 'subtype')) != 'movie':
-            for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
+            for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})):
-                episode_data = traverse_obj(
+                episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
                    season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
                if not episode_data:
                    continue
-                episode_title = traverse_obj(
+                season_number = traverse_obj(season_data, ('season', {int_or_none}))
                    episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
                info_dict.update({
-                    'title': episode_title or info_dict.get('title'),
+                    'episode': traverse_obj(episode_data, ('episodeTitle', {str})),
                    'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
                    'season_number': traverse_obj(season_data, ('season', {int_or_none})),
                    'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
                    'season_number': season_number,
                    'series': self._search_regex(
                        fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None),
                })
                break
--- a/yt_dlp/extractor/mixlr.py
+++ b/yt_dlp/extractor/mixlr.py
@ -0,0 +1,134 @@
 from .common import InfoExtractor
 from ..networking import HEADRequest
 from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext
 from ..utils.traversal import traverse_obj
 class MixlrIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/events/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://suncity-104-9fm.mixlr.com/events/4387115',
        'info_dict': {
            'id': '4387115',
            'ext': 'mp3',
            'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
            'uploader': 'suncity-104-9fm',
            'like_count': int,
            'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png',
            'timestamp': 1751943773,
            'upload_date': '20250708',
            'release_timestamp': 1751943764,
            'release_date': '20250708',
            'live_status': 'is_live',
        },
    }, {
        'url': 'https://brcountdown.mixlr.com/events/4395480',
        'info_dict': {
            'id': '4395480',
            'ext': 'aac',
            'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
            'description': 'md5:5cacd089723f7add3f266bd588315bb3',
            'uploader': 'brcountdown',
            'like_count': int,
            'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg',
            'timestamp': 1752354007,
            'upload_date': '20250712',
            'release_timestamp': 1752354000,
            'release_date': '20250712',
            'live_status': 'is_live',
        },
    }, {
        'url': 'https://www.brcountdown.mixlr.com/events/4395480',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        username, event_id = self._match_valid_url(url).group('username', 'id')
        broadcast_info = self._download_json(
            f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id)
        formats = []
        format_url = traverse_obj(
            broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none}))
        if format_url:
            urlh = self._request_webpage(
                HEADRequest(format_url), event_id, fatal=False, note='Checking stream')
            if urlh and urlh.status == 200:
                ext = urlhandle_detect_ext(urlh)
                if ext == 'octet-stream':
                    self.report_warning(
                        'The server did not return a valid file extension for the stream URL. '
                        'Assuming an mp3 stream; postprocessing may fail if this is incorrect')
                    ext = 'mp3'
                formats.append({
                    'url': format_url,
                    'ext': ext,
                    'vcodec': 'none',
                })
        release_timestamp = traverse_obj(
            broadcast_info, ('data', 'attributes', 'starts_at', {str}))
        if not formats and release_timestamp:
            self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True)
        return {
            'id': event_id,
            'uploader': username,
            'formats': formats,
            'release_timestamp': parse_iso8601(release_timestamp),
            **traverse_obj(broadcast_info, ('included', 0, 'attributes', {
                'title': ('title', {str}),
                'timestamp': ('started_at', {parse_iso8601}),
                'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
                'like_count': ('heart_count', {int_or_none}),
                'is_live': ('live', {bool}),
            })),
            **traverse_obj(broadcast_info, ('data', 'attributes', {
                'title': ('title', {str}),
                'description': ('description', {str}),
                'timestamp': ('started_at', {parse_iso8601}),
                'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
                'like_count': ('heart_count', {int_or_none}),
                'thumbnail': ('artwork_url', {url_or_none}),
                'uploader_id': ('broadcaster_id', {str}),
            })),
        }
 class MixlrRecoringIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/recordings/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://biblewayng.mixlr.com/recordings/2375193',
        'info_dict': {
            'id': '2375193',
            'ext': 'mp3',
            'title': "God's Jewels and Their Resting Place Bro. Adeniji",
            'description': 'Preached February 21, 2024 in the evening',
            'uploader_id': '8659190',
            'duration': 10968,
            'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg',
            'timestamp': 1708544542,
            'upload_date': '20240221',
        },
    }]
    def _real_extract(self, url):
        username, recording_id = self._match_valid_url(url).group('username', 'id')
        recording_info = self._download_json(
            f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id)
        return {
            'id': recording_id,
            **traverse_obj(recording_info, ('data', 'attributes', {
                'ext': ('file_format', {str}),
                'url': ('url', {url_or_none}),
                'title': ('title', {str}),
                'description': ('description', {str}),
                'timestamp': ('created_at', {parse_iso8601}),
                'duration': ('duration', {int_or_none}),
                'thumbnail': ('artwork_url', {url_or_none}),
                'uploader_id': ('user_id', {str}),
            })),
        }
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@ -1,6 +1,3 @@
 import json
 import re
 from .brightcove import BrightcoveNewIE
 from .common import InfoExtractor
 from ..utils import (
@ -11,7 +8,12 @@ from ..utils import (
    str_or_none,
    url_or_none,
 )
-from ..utils.traversal import require, traverse_obj, value
+from ..utils.traversal import (
    get_first,
    require,
    traverse_obj,
    value,
 )
 class NineNowIE(InfoExtractor):
@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor):
    }]
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
    def _find_json(self, s):
        return self._search_json(
            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
    def _real_extract(self, url):
        display_id, video_type = self._match_valid_url(url).group('id', 'type')
        webpage = self._download_webpage(url, display_id)
-        common_data = traverse_obj(
+        common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict}))
            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
            (..., {json.loads}, ..., {self._find_json},
             lambda _, v: v['payload'][video_type]['slug'] == display_id,
             'payload', any, {require('video data')}))
        if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
            self.report_drm(display_id)
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@ -21,6 +21,7 @@ from ..utils import (
    js_to_json,
    jwt_decode_hs256,
    merge_dicts,
    mimetype2ext,
    parse_filesize,
    parse_iso8601,
    parse_qs,
@ -28,9 +29,11 @@ from ..utils import (
    smuggle_url,
    str_or_none,
    traverse_obj,
    try_call,
    try_get,
    unified_timestamp,
    unsmuggle_url,
    url_basename,
    url_or_none,
    urlencode_postdata,
    urlhandle_detect_ext,
@ -45,14 +48,56 @@ class VimeoBaseInfoExtractor(InfoExtractor):
    _REFERER_HINT = (
        'Cannot download embed-only video without embedding URL. Please call yt-dlp '
        'with the URL of the page that embeds this video.')
-    _IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
+
-    _IOS_CLIENT_HEADERS = {
+    _DEFAULT_CLIENT = 'android'
    _CLIENT_HEADERS = {
        'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
        'Accept-Language': 'en',
        'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
    }
-    _IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
+    _CLIENT_CONFIGS = {
-    _ios_oauth_token = None
+        'android': {
            'CACHE_KEY': 'oauth-token-android',
            'CACHE_ONLY': False,
            'VIEWER_JWT': False,
            'REQUIRES_AUTH': False,
            'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==',
            'USER_AGENT': 'com.vimeo.android.videoapp (OnePlus, ONEPLUS A6003, OnePlus, Android 14/34 Version 11.8.1) Kotlin VimeoNetworking/3.12.0',
            'VIDEOS_FIELDS': (
                'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', 'width',
                'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', 'content_rating',
                'content_rating_class', 'rating_mod_locked', 'license', 'privacy', 'pictures', 'tags', 'stats',
                'categories', 'uploader', 'metadata', 'user', 'files', 'download', 'app', 'play', 'status',
                'resource_key', 'badge', 'upload', 'transcode', 'is_playable', 'has_audio',
            ),
        },
        'ios': {
            'CACHE_KEY': 'oauth-token-ios',
            'CACHE_ONLY': True,
            'VIEWER_JWT': False,
            'REQUIRES_AUTH': False,
            'AUTH': 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==',
            'USER_AGENT': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
            'VIDEOS_FIELDS': (
                'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration',
                'width', 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time',
                'content_rating', 'content_rating_class', 'rating_mod_locked', 'license', 'config_url',
                'embed_player_config_url', 'privacy', 'pictures', 'tags', 'stats', 'categories', 'uploader',
                'metadata', 'user', 'files', 'download', 'app', 'play', 'status', 'resource_key', 'badge',
                'upload', 'transcode', 'is_playable', 'has_audio',
            ),
        },
        'web': {
            'VIEWER_JWT': True,
            'REQUIRES_AUTH': True,
            'USER_AGENT': None,
            'VIDEOS_FIELDS': (
                'config_url', 'created_time', 'description', 'license',
                'metadata.connections.comments.total', 'metadata.connections.likes.total',
                'release_time', 'stats.plays',
            ),
        },
    }
    _oauth_tokens = {}
    _viewer_info = None
    @staticmethod
@ -105,8 +150,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
            raise ExtractorError('Unable to log in')
    def _real_initialize(self):
-        if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'):
+        if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vimeo'):
-            self._raise_login_required()
+            self.raise_login_required()
    def _get_video_password(self):
        password = self.get_param('videopassword')
@ -277,52 +322,88 @@ class VimeoBaseInfoExtractor(InfoExtractor):
            '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
        }
-    def _fetch_oauth_token(self):
+    def _fetch_oauth_token(self, client):
-        if not self._ios_oauth_token:
+        client_config = self._CLIENT_CONFIGS[client]
            self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
-        if not self._ios_oauth_token:
+        if client_config['VIEWER_JWT']:
-            self._ios_oauth_token = self._download_json(
+            return f'jwt {self._fetch_viewer_info()["jwt"]}'
        cache_key = client_config['CACHE_KEY']
        if not self._oauth_tokens.get(cache_key):
            self._oauth_tokens[cache_key] = self.cache.load(self._NETRC_MACHINE, cache_key)
        if not self._oauth_tokens.get(cache_key):
            if client_config['CACHE_ONLY']:
                raise ExtractorError(
                    f'The {client} client is unable to fetch new OAuth tokens '
                    f'and is only intended for use with previously cached tokens', expected=True)
            self._oauth_tokens[cache_key] = self._download_json(
                'https://api.vimeo.com/oauth/authorize/client', None,
-                'Fetching OAuth token', 'Failed to fetch OAuth token',
+                f'Fetching {client} OAuth token', f'Failed to fetch {client} OAuth token',
                headers={
-                    'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
+                    'Authorization': f'Basic {client_config["AUTH"]}',
-                    **self._IOS_CLIENT_HEADERS,
+                    'User-Agent': client_config['USER_AGENT'],
                    **self._CLIENT_HEADERS,
                }, data=urlencode_postdata({
                    'grant_type': 'client_credentials',
-                    'scope': 'private public create edit delete interact upload purchased stats',
+                    'scope': 'private public create edit delete interact upload purchased stats video_files',
                }, quote_via=urllib.parse.quote))['access_token']
-            self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
+            self.cache.store(self._NETRC_MACHINE, cache_key, self._oauth_tokens[cache_key])
-        return self._ios_oauth_token
+        return f'Bearer {self._oauth_tokens[cache_key]}'
    def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs):
        client = force_client or self._configuration_arg('client', [self._DEFAULT_CLIENT], ie_key=VimeoIE)[0]
        if client not in self._CLIENT_CONFIGS:
            raise ExtractorError(
                f'Unsupported API client "{client}" requested. '
                f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
        client_config = self._CLIENT_CONFIGS[client]
        if client_config['REQUIRES_AUTH'] and not self._get_cookies('https://vimeo.com').get('vimeo'):
            self.raise_login_required(f'The {client} client requires authentication')
    def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
        return self._download_json(
-            join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
+            join_nonempty(
-            video_id, 'Downloading API JSON', headers={
+                'https://api.vimeo.com/videos',
-                'Authorization': f'Bearer {self._fetch_oauth_token()}',
+                join_nonempty(video_id, unlisted_hash, delim=':'),
-                **self._IOS_CLIENT_HEADERS,
+                path, delim='/'),
-            }, query={
+            video_id, f'Downloading {client} API JSON', f'Unable to download {client} API JSON',
-                'fields': ','.join((
+            headers=filter_dict({
-                    'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
+                'Authorization': self._fetch_oauth_token(client),
-                    'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
+                'User-Agent': client_config['USER_AGENT'],
-                    'metadata.connections.comments.total', 'metadata.connections.likes.total')),
+                **self._CLIENT_HEADERS,
            }), query={
                'fields': ','.join(client_config['VIDEOS_FIELDS']),
                **(query or {}),
            }, **kwargs)
-    def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
+    def _extract_original_format(self, url, video_id, unlisted_hash=None):
        # Original/source formats are only available when logged in
        if not self._get_cookies('https://vimeo.com/').get('vimeo'):
-            return
+            return None
-        query = {'action': 'load_download_config'}
+        policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0]
-        if unlisted_hash:
+        if policy == 'never':
-            query['unlisted_hash'] = unlisted_hash
+            return None
-        download_data = self._download_json(
+
-            url, video_id, 'Loading download config JSON', fatal=False,
+        try:
-            query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
+            download_data = self._download_json(
-            expected_status=(403, 404)) or {}
+                url, video_id, 'Loading download config JSON', query=filter_dict({
-        source_file = download_data.get('source_file')
+                    'action': 'load_download_config',
-        download_url = try_get(source_file, lambda x: x['download_url'])
+                    'unlisted_hash': unlisted_hash,
                }), headers={
                    'Accept': 'application/json',
                    'X-Requested-With': 'XMLHttpRequest',
                })
        except ExtractorError as error:
            self.write_debug(f'Unable to load download config JSON: {error.cause}')
            download_data = None
        source_file = traverse_obj(download_data, ('source_file', {dict})) or {}
        download_url = traverse_obj(source_file, ('download_url', {url_or_none}))
        if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
            source_name = source_file.get('public_name', 'Original')
            if self._is_valid_url(download_url, video_id, f'{source_name} video'):
@ -340,8 +421,27 @@ class VimeoBaseInfoExtractor(InfoExtractor):
                    'quality': 1,
                }
-        original_response = api_data or self._call_videos_api(
+        # Most web client API requests are subject to rate-limiting (429) when logged-in.
-            video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
+        # Requesting only the 'privacy' field is NOT rate-limited,
        # so first we should check if video even has 'download' formats available
        try:
            privacy_info = self._call_videos_api(
                video_id, unlisted_hash, force_client='web', query={'fields': 'privacy'})
        except ExtractorError as error:
            self.write_debug(f'Unable to download privacy info: {error.cause}')
            return None
        if not traverse_obj(privacy_info, ('privacy', 'download', {bool})):
            msg = f'{video_id}: Vimeo says this video is not downloadable'
            if policy != 'always':
                self.write_debug(
                    f'{msg}, so yt-dlp is not attempting to extract the original/source format. '
                    f'To try anyways, use --extractor-args "vimeo:original_format_policy=always"')
                return None
            self.write_debug(f'{msg}; attempting to extract original/source format anyways')
        original_response = self._call_videos_api(
            video_id, unlisted_hash, force_client='web', query={'fields': 'download'}, fatal=False)
        for download_data in traverse_obj(original_response, ('download', ..., {dict})):
            download_url = download_data.get('link')
            if not download_url or download_data.get('quality') != 'source':
@ -919,6 +1019,92 @@ class VimeoIE(VimeoBaseInfoExtractor):
            raise ExtractorError('Wrong video password', expected=True)
        return checked
    def _get_subtitles(self, video_id, unlisted_hash):
        subs = {}
        text_tracks = self._call_videos_api(
            video_id, unlisted_hash, path='texttracks', query={
                'include_transcript': 'true',
                'fields': ','.join((
                    'active', 'display_language', 'id', 'language', 'link', 'name', 'type', 'uri',
                )),
            }, fatal=False)
        for tt in traverse_obj(text_tracks, ('data', lambda _, v: url_or_none(v['link']))):
            subs.setdefault(tt.get('language'), []).append({
                'url': tt['link'],
                'ext': 'vtt',
                'name': tt.get('display_language'),
            })
        return subs
    def _parse_api_response(self, video, video_id, unlisted_hash=None):
        formats, subtitles = [], {}
        seen_urls = set()
        duration = traverse_obj(video, ('duration', {int_or_none}))
        for file in traverse_obj(video, (
            (('play', (None, 'progressive')), 'files', 'download'), lambda _, v: url_or_none(v['link']),
        )):
            format_url = file['link']
            if format_url in seen_urls:
                continue
            seen_urls.add(format_url)
            quality = file.get('quality')
            ext = determine_ext(format_url)
            if quality == 'hls' or ext == 'm3u8':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
            elif quality == 'dash' or ext == 'mpd':
                fmts, subs = self._extract_mpd_formats_and_subtitles(
                    format_url, video_id, mpd_id='dash', fatal=False)
                for fmt in fmts:
                    fmt['format_id'] = join_nonempty(
                        *fmt['format_id'].split('-', 2)[:2], int_or_none(fmt.get('tbr')))
            else:
                fmt = traverse_obj(file, {
                    'ext': ('type', {mimetype2ext(default='mp4')}),
                    'vcodec': ('codec', {str.lower}),
                    'width': ('width', {int_or_none}),
                    'height': ('height', {int_or_none}),
                    'filesize': ('size', {int_or_none}),
                    'fps': ('fps', {int_or_none}),
                })
                fmt.update({
                    'url': format_url,
                    'format_id': join_nonempty(
                        'http', traverse_obj(file, 'public_name', 'rendition'), quality),
                    'tbr': try_call(lambda: fmt['filesize'] * 8 / duration / 1024),
                })
                formats.append(fmt)
                continue
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
        if traverse_obj(video, ('metadata', 'connections', 'texttracks', 'total', {int})):
            self._merge_subtitles(self.extract_subtitles(video_id, unlisted_hash), target=subtitles)
        return {
            **traverse_obj(video, {
                'title': ('name', {str}),
                'uploader': ('user', 'name', {str}),
                'uploader_id': ('user', 'link', {url_basename}),
                'uploader_url': ('user', 'link', {url_or_none}),
                'release_timestamp': ('live', 'scheduled_start_time', {int_or_none}),
                'thumbnails': ('pictures', 'sizes', lambda _, v: url_or_none(v['link']), {
                    'url': 'link',
                    'width': ('width', {int_or_none}),
                    'height': ('height', {int_or_none}),
                }),
            }),
            'id': video_id,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
            'live_status': {
                'streaming': 'is_live',
                'done': 'was_live',
            }.get(traverse_obj(video, ('live', 'status', {str}))),
        }
    def _extract_from_api(self, video_id, unlisted_hash=None):
        for retry in (False, True):
            try:
@ -934,10 +1120,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
                    continue
                raise
-        info = self._parse_config(self._download_json(
+        if config_url := traverse_obj(video, ('config_url', {url_or_none})):
-            video['config_url'], video_id), video_id)
+            info = self._parse_config(self._download_json(config_url, video_id), video_id)
        else:
            info = self._parse_api_response(video, video_id, unlisted_hash)
        source_format = self._extract_original_format(
-            f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
+            f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
        if source_format:
            info['formats'].append(source_format)
Author	SHA1	Message	Date
ShockedPlot7560	0f33950c77	[ie/mixlr] Add extractors (#13561 ) Authored by: ShockedPlot7560, seproDev Co-authored-by: sepro <sepro@sepr0.com>	2025-07-13 01:35:51 +02:00
bashonly	b5fea53f20	[ie] Rework `_search_nextjs_v13_data` helper (#13711 ) Fix 5245231e4a39ecd5595d4337d46d85e150e2430a Authored by: bashonly	2025-07-12 23:12:05 +00:00
bashonly	5245231e4a	[ie] Add `_search_nextjs_v13_data` helper (#13398 ) * Fixes FranceTVSiteIE livestream extraction * Fixes GoPlayIE metadata extraction Authored by: bashonly	2025-07-12 22:12:46 +00:00
Lyuben Ivanov	3ae61e0f31	[ie/BTVPlus] Add extractor (#13541 ) Authored by: bubo	2025-07-12 21:56:11 +02:00
bashonly	a5d697f62d	[ie/vimeo] Fix extractor (#13692 ) Closes #13180, Closes #13689 Authored by: bashonly	2025-07-12 19:23:22 +00:00