[ie/ERRArhiiv] Add extractor (#15667 )

Closes #15663 Authored by: rdamas
[ie/franceinfo] Fix extraction (#15704 )
2026-06-13 14:24:31 +00:00 · 2026-01-27 16:53:38 +00:00 · 2026-01-27 15:40:47 +00:00 · 2026-01-27 14:09:16 +00:00 · 2026-01-27 14:08:22 +00:00 · 2026-01-27 12:52:49 +00:00
11 changed files with 509 additions and 94 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -564,7 +564,10 @@ from .eroprofile import (
    EroProfileAlbumIE,
    EroProfileIE,
 )
-from .err import ERRJupiterIE
+from .err import (
+    ERRArhiivIE,
+    ERRJupiterIE,
+)
 from .ertgr import (
    ERTFlixCodenameIE,
    ERTFlixIE,
@ -2360,7 +2363,11 @@ from .voicy import (
    VoicyChannelIE,
    VoicyIE,
 )
-from .volejtv import VolejTVIE
+from .volejtv import (
+    VolejTVCategoryPlaylistIE,
+    VolejTVClubPlaylistIE,
+    VolejTVIE,
+)
 from .voxmedia import (
    VoxMediaIE,
    VoxMediaVolumeIE,
--- a/yt_dlp/extractor/boosty.py
+++ b/yt_dlp/extractor/boosty.py
@ -21,21 +21,44 @@ class BoostyIE(InfoExtractor):
        'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
        'info_dict': {
            'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
-            'title': 'phasma_3',
+            'title': 'Бан? А! Бан! (Phasmophobia)',
+            'alt_title': 'Бан? А! Бан! (Phasmophobia)',
            'channel': 'Kuplinov',
            'channel_id': '7958701',
            'timestamp': 1655031975,
            'upload_date': '20220612',
            'release_timestamp': 1655049000,
            'release_date': '20220612',
-            'modified_timestamp': 1668680993,
-            'modified_date': '20221117',
+            'modified_timestamp': 1743328648,
+            'modified_date': '20250330',
            'tags': ['куплинов', 'phasmophobia'],
            'like_count': int,
            'ext': 'mp4',
            'duration': 105,
            'view_count': int,
-            'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
+            'thumbnail': r're:^https://iv\.okcdn\.ru/videoPreview\?',
+        },
+    }, {
+        # single ok_video with truncated title
+        'url': 'https://boosty.to/kuplinov/posts/cc09b7f9-121e-40b8-9392-4a075ef2ce53',
+        'info_dict': {
+            'id': 'fb5ea762-6303-4557-9a17-157947326810',
+            'title': 'Какая там активность была? Не слышу! Повтори еще пару раз! (Phas',
+            'alt_title': 'Какая там активность была? Не слышу! Повтори еще пару раз! (Phasmophobia)',
+            'channel': 'Kuplinov',
+            'channel_id': '7958701',
+            'timestamp': 1655031930,
+            'upload_date': '20220612',
+            'release_timestamp': 1655048400,
+            'release_date': '20220612',
+            'modified_timestamp': 1743328616,
+            'modified_date': '20250330',
+            'tags': ['куплинов', 'phasmophobia'],
+            'like_count': int,
+            'ext': 'mp4',
+            'duration': 39,
+            'view_count': int,
+            'thumbnail': r're:^https://iv\.okcdn\.ru/videoPreview\?',
        },
    }, {
        # multiple ok_video
@ -109,36 +132,41 @@ class BoostyIE(InfoExtractor):
                'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
            },
        }],
+        'skip': 'post has been deleted',
    }, {
        # single external video (youtube)
-        'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
+        'url': 'https://boosty.to/futuremusicproduction/posts/32a8cae2-3252-49da-b285-0e014bc6e565',
        'info_dict': {
-            'id': 'EXelTnve5lY',
-            'title': 'Послание Президента Федеральному Собранию | Класс народа',
-            'upload_date': '20210425',
-            'channel': 'Денис Чужой',
-            'tags': 'count:10',
+            'id': '-37FW_YQ3B4',
+            'title': 'Afro | Deep House FREE FLP',
+            'media_type': 'video',
+            'upload_date': '20250829',
+            'timestamp': 1756466005,
+            'channel': 'Future Music Production',
+            'tags': 'count:0',
            'like_count': int,
-            'ext': 'mp4',
-            'duration': 816,
+            'ext': 'm4a',
+            'duration': 170,
            'view_count': int,
            'thumbnail': r're:^https://i\.ytimg\.com/',
            'age_limit': 0,
            'availability': 'public',
            'categories': list,
            'channel_follower_count': int,
-            'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
-            'channel_is_verified': bool,
+            'channel_id': 'UCKVYrFBYmci1e-T8NeHw2qg',
            'channel_url': r're:^https://www\.youtube\.com/',
            'comment_count': int,
            'description': str,
-            'heatmap': 'count:100',
            'live_status': str,
            'playable_in_embed': bool,
            'uploader': str,
            'uploader_id': str,
            'uploader_url': r're:^https://www\.youtube\.com/',
        },
+        'expected_warnings': [
+            'Remote components challenge solver script',
+            'n challenge solving failed',
+        ],
    }]

    _MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
@ -207,13 +235,14 @@ class BoostyIE(InfoExtractor):
                video_id = item.get('id') or post_id
                entries.append({
                    'id': video_id,
+                    'alt_title': post_title,
                    'formats': self._extract_formats(item.get('playerUrls'), video_id),
                    **common_metadata,
                    **traverse_obj(item, {
                        'title': ('title', {str}),
                        'duration': ('duration', {int_or_none}),
                        'view_count': ('viewsCounter', {int_or_none}),
-                        'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
+                        'thumbnail': (('preview', 'defaultPreview'), {url_or_none}),
                    }, get_all=False)})

        if not entries and not post.get('hasAccess'):
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@ -366,8 +366,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):

    @staticmethod
    def _generate_blockbuster_headers():
-        # Randomize our HTTP header fingerprint to bust the HTTP Error 403 block
-        # See https://github.com/yt-dlp/yt-dlp/issues/15526
+        """Randomize our HTTP header fingerprint to bust the HTTP Error 403 block"""

        def random_letters(minimum, maximum):
            # Omit vowels so we don't generate valid header names like 'authorization', etc
@ -378,6 +377,43 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
            for _ in range(random.randint(2, 8))
        }

+    def _extract_dailymotion_m3u8_formats_and_subtitles(self, media_url, video_id, live=False):
+        """See https://github.com/yt-dlp/yt-dlp/issues/15526"""
+
+        ERROR_NOTE = 'Unable to download m3u8 information'
+        last_error = None
+
+        for note, kwargs in (
+            ('Downloading m3u8 information', {}),
+            ('Retrying m3u8 download with randomized headers', {
+                'headers': self._generate_blockbuster_headers(),
+            }),
+            ('Retrying m3u8 download with Chrome impersonation', {
+                'impersonate': 'chrome',
+                'require_impersonation': True,
+            }),
+            ('Retrying m3u8 download with Firefox impersonation', {
+                'impersonate': 'firefox',
+                'require_impersonation': True,
+            }),
+        ):
+            try:
+                m3u8_doc = self._download_webpage(media_url, video_id, note, ERROR_NOTE, **kwargs)
+                break
+            except ExtractorError as e:
+                last_error = e.orig_msg
+                self.write_debug(f'{video_id}: {last_error}')
+        else:
+            if 'impersonation' not in last_error:
+                self.report_warning(last_error, video_id=video_id)
+                last_error = None
+            return [], {}, last_error
+
+        formats, subtitles = self._parse_m3u8_formats_and_subtitles(
+            m3u8_doc, media_url, 'mp4', m3u8_id='hls', live=live, fatal=False)
+
+        return formats, subtitles, last_error
+
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url)
        video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
@ -431,6 +467,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
        is_live = media.get('isOnAir')
        formats = []
        subtitles = {}
+        expected_error = None

        for quality, media_list in metadata['qualities'].items():
            for m in media_list:
@ -439,9 +476,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                if not media_url or media_type == 'application/vnd.lumberjack.manifest':
                    continue
                if media_type == 'application/x-mpegURL':
-                    fmt, subs = self._extract_m3u8_formats_and_subtitles(
-                        media_url, video_id, 'mp4', live=is_live, m3u8_id='hls',
-                        fatal=False, headers=self._generate_blockbuster_headers())
+                    fmt, subs, expected_error = self._extract_dailymotion_m3u8_formats_and_subtitles(
+                        media_url, video_id, live=is_live)
                    formats.extend(fmt)
                    self._merge_subtitles(subs, target=subtitles)
                else:
@ -458,6 +494,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                            'width': width,
                        })
                    formats.append(f)
+
+        if not formats and expected_error:
+            self.raise_no_formats(expected_error, expected=True)
+
        for f in formats:
            f['url'] = f['url'].split('#')[0]
            if not f.get('fps') and f['format_id'].endswith('@60'):
--- a/yt_dlp/extractor/err.py
+++ b/yt_dlp/extractor/err.py
@ -2,6 +2,7 @@ from .common import InfoExtractor
 from ..utils import (
    clean_html,
    int_or_none,
+    parse_iso8601,
    str_or_none,
    url_or_none,
 )
@ -222,3 +223,70 @@ class ERRJupiterIE(InfoExtractor):
                'episode_id': ('id', {str_or_none}),
            }) if data.get('type') == 'episode' else {}),
        }
+
+
+class ERRArhiivIE(InfoExtractor):
+    _VALID_URL = r'https://arhiiv\.err\.ee/video/(?:vaata/)?(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://arhiiv.err.ee/video/kontsertpalad',
+        'info_dict': {
+            'id': 'kontsertpalad',
+            'ext': 'mp4',
+            'title': 'Kontsertpalad: 255 | L. Beethoveni sonaat c-moll, "Pateetiline"',
+            'description': 'md5:a70f4ff23c3618f3be63f704bccef063',
+            'series': 'Kontsertpalad',
+            'episode_id': 255,
+            'timestamp': 1666152162,
+            'upload_date': '20221019',
+            'release_year': 1970,
+            'modified_timestamp': 1718620982,
+            'modified_date': '20240617',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://arhiiv.err.ee/video/vaata/koalitsioonileppe-allkirjastamine',
+        'info_dict': {
+            'id': 'koalitsioonileppe-allkirjastamine',
+            'ext': 'mp4',
+            'title': 'Koalitsioonileppe allkirjastamine',
+            'timestamp': 1710728222,
+            'upload_date': '20240318',
+            'release_timestamp': 1611532800,
+            'release_date': '20210125',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._download_json(
+            f'https://arhiiv.err.ee/api/v1/content/video/{video_id}', video_id)
+
+        formats, subtitles = [], {}
+        if hls_url := traverse_obj(data, ('media', 'src', 'hls', {url_or_none})):
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+        if dash_url := traverse_obj(data, ('media', 'src', 'dash', {url_or_none})):
+            fmts, subs = self._extract_mpd_formats_and_subtitles(
+                dash_url, video_id, mpd_id='dash', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(data, ('info', {
+                'title': ('title', {str}),
+                'series': ('seriesTitle', {str}, filter),
+                'series_id': ('seriesId', {str}, filter),
+                'episode_id': ('episode', {int_or_none}),
+                'description': ('synopsis', {str}, filter),
+                'timestamp': ('uploadDate', {parse_iso8601}),
+                'modified_timestamp': ('dateModified', {parse_iso8601}),
+                'release_timestamp': ('date', {parse_iso8601}),
+                'release_year': ('year', {int_or_none}),
+            })),
+        }
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@ -371,15 +371,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):


 class FranceTVInfoIE(FranceTVBaseInfoExtractor):
-    IE_NAME = 'francetvinfo.fr'
-    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
+    IE_NAME = 'franceinfo'
+    IE_DESC = 'franceinfo.fr (formerly francetvinfo.fr)'
+    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.france(?:tv)?info.fr/(?:[^/?#]+/)*(?P<id>[^/?#&.]+)'

    _TESTS = [{
        'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
        'info_dict': {
            'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
            'ext': 'mp4',
-            'title': 'Soir 3',
+            'title': 'Soir 3 - Émission du jeudi 22 août 2019',
            'upload_date': '20190822',
            'timestamp': 1566510730,
            'thumbnail': r're:^https?://.*\.jpe?g$',
@ -398,7 +399,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
        'info_dict': {
            'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
            'ext': 'mp4',
-            'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
+            'title': 'Journal 20h00 - Covid-19 : une situation catastrophique à New Dehli',
            'thumbnail': r're:^https?://.*\.jpe?g$',
            'duration': 76,
            'timestamp': 1619028518,
@ -438,6 +439,18 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
            'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
        },
        'add_ie': ['Dailymotion'],
+        'skip': 'Broken Dailymotion link',
+    }, {
+        'url': 'https://www.franceinfo.fr/monde/usa/presidentielle/donald-trump/etats-unis-un-risque-d-embrasement-apres-la-mort-d-un-manifestant_7764542.html',
+        'info_dict': {
+            'id': 'f920fcc2-fa20-11f0-ac98-57a09c50f7ce',
+            'ext': 'mp4',
+            'title': 'Affaires sensibles - Manifestant tué Le risque d\'embrasement',
+            'duration': 118,
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1769367756,
+            'upload_date': '20260125',
+        },
    }, {
        'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
        'only_matching': True,
@ -445,6 +458,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
        # "<figure id=" pattern (#28792)
        'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
        'only_matching': True,
+    }, {
+        'url': 'https://www.franceinfo.fr/replay-jt/france-2/20-heures/robert-de-niro-portrait-d-un-monument-du-cinema_7245456.html',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -460,7 +476,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):

        video_id = (
            traverse_obj(webpage, (
-                {find_element(tag='button', attr='data-cy', value='francetv-player-wrapper', html=True)},
+                {find_element(tag='(button|div)', attr='data-cy', value='francetv-player-wrapper', html=True, regex=True)},
                {extract_attributes}, 'id'))
            or self._search_regex(
                (r'player\.load[^;]+src:\s*["\']([^"\']+)',
--- a/yt_dlp/extractor/frontro.py
+++ b/yt_dlp/extractor/frontro.py
@ -104,9 +104,9 @@ class FrontroGroupBaseIE(FrontoBaseIE):
 class TheChosenIE(FrontroVideoBaseIE):
    _CHANNEL_ID = '12884901895'

-    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/watch/(?P<id>[0-9]+)'
    _TESTS = [{
-        'url': 'https://watch.thechosen.tv/video/184683594325',
+        'url': 'https://watch.thechosen.tv/watch/184683594325',
        'md5': '3f878b689588c71b38ec9943c54ff5b0',
        'info_dict': {
            'id': '184683594325',
@ -124,7 +124,7 @@ class TheChosenIE(FrontroVideoBaseIE):
            'modified_date': str,
        },
    }, {
-        'url': 'https://watch.thechosen.tv/video/184683596189',
+        'url': 'https://watch.thechosen.tv/watch/184683596189',
        'md5': 'd581562f9d29ce82f5b7770415334151',
        'info_dict': {
            'id': '184683596189',
@ -147,7 +147,7 @@ class TheChosenIE(FrontroVideoBaseIE):
 class TheChosenGroupIE(FrontroGroupBaseIE):
    _CHANNEL_ID = '12884901895'
    _VIDEO_EXTRACTOR = TheChosenIE
-    _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
+    _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'

    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
    _TESTS = [{
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@ -156,18 +156,36 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
            'id': '17241424',
            'ext': 'mp3',
            'title': 'Opus 28',
-            'upload_date': '20080211',
-            'timestamp': 1202745600,
+            'upload_date': '20060912',
+            'timestamp': 1158076800,
            'duration': 263,
            'thumbnail': r're:^http.*\.jpg',
-            'album': 'Piano Solos Vol. 2',
+            'album': 'Piano Solos, Vol. 2',
            'album_artist': 'Dustin O\'Halloran',
            'average_rating': int,
-            'description': '[00:05.00]纯音乐，请欣赏\n',
+            'description': 'md5:b566b92c55ca348df65d206c5d689576',
            'album_artists': ['Dustin O\'Halloran'],
            'creators': ['Dustin O\'Halloran'],
            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
        },
+    }, {
+        'url': 'https://music.163.com/#/song?id=2755669231',
+        'info_dict': {
+            'id': '2755669231',
+            'ext': 'mp3',
+            'title': '十二月-Departure',
+            'upload_date': '20251111',
+            'timestamp': 1762876800,
+            'duration': 188,
+            'thumbnail': r're:^http.*\.jpg',
+            'album': '円',
+            'album_artist': 'ひとひら',
+            'average_rating': int,
+            'description': 'md5:deee249c8c9c3e2c54ecdab36e87d174',
+            'album_artists': ['ひとひら'],
+            'creators': ['ひとひら'],
+            'subtitles': {'lyrics': [{'ext': 'lrc', 'data': 'md5:d32b4425a5d6c9fa249ca6e803dd0401'}]},
+        },
    }, {
        'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
        'md5': 'b896be78d8d34bd7bb665b26710913ff',
@ -241,9 +259,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
                'lyrics': [{'data': original, 'ext': 'lrc'}],
            }

-        lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
-        original_ts_texts = re.findall(lyrics_expr, original)
-        translation_ts_dict = dict(re.findall(lyrics_expr, translated))
+        def collect_lyrics(lrc):
+            lyrics_expr = r'\[([0-9]{2}):([0-9]{2})[:.]([0-9]{2,})\]([^\n]+)'
+            matches = re.findall(lyrics_expr, lrc)
+            return (
+                (f'[{minute}:{sec}.{msec}]', text)
+                for minute, sec, msec, text in matches
+            )
+
+        original_ts_texts = collect_lyrics(original)
+        translation_ts_dict = dict(collect_lyrics(translated))

        merged = '\n'.join(
            join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -1,6 +1,5 @@
 import functools
 import itertools
-import urllib.parse

 from .common import InfoExtractor
 from .sproutvideo import VidsIoIE
@ -11,15 +10,23 @@ from ..utils import (
    ExtractorError,
    clean_html,
    determine_ext,
+    extract_attributes,
+    float_or_none,
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    smuggle_url,
    str_or_none,
+    update_url_query,
    url_or_none,
    urljoin,
 )
-from ..utils.traversal import require, traverse_obj, value
+from ..utils.traversal import (
+    find_elements,
+    require,
+    traverse_obj,
+    value,
+)


 class PatreonBaseIE(InfoExtractor):
@ -121,6 +128,7 @@ class PatreonIE(PatreonBaseIE):
            'channel_is_verified': True,
            'chapters': 'count:4',
            'timestamp': 1423689666,
+            'media_type': 'video',
        },
        'params': {
            'noplaylist': True,
@ -161,7 +169,7 @@ class PatreonIE(PatreonBaseIE):
            'uploader_url': 'https://www.patreon.com/loish',
            'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
            'title': 'VIDEO // sketchbook flipthrough',
-            'uploader': 'Loish ',
+            'uploader': 'Loish',
            'tags': ['sketchbook', 'video'],
            'channel_id': '1641751',
            'channel_url': 'https://www.patreon.com/loish',
@ -274,8 +282,73 @@ class PatreonIE(PatreonBaseIE):
            'channel_id': '9346307',
        },
        'params': {'getcomments': True},
+    }, {
+        # Inlined media in post; uses _extract_from_media_api
+        'url': 'https://www.patreon.com/posts/scottfalco-146966245',
+        'info_dict': {
+            'id': '146966245',
+            'ext': 'mp4',
+            'title': 'scottfalco 1080',
+            'description': 'md5:a3f29bbd0a46b4821ec3400957c98aa2',
+            'uploader': 'Insanimate',
+            'uploader_id': '2828146',
+            'uploader_url': 'https://www.patreon.com/Insanimate',
+            'channel_id': '6260877',
+            'channel_url': 'https://www.patreon.com/Insanimate',
+            'channel_follower_count': int,
+            'comment_count': int,
+            'like_count': int,
+            'duration': 7.833333,
+            'timestamp': 1767061800,
+            'upload_date': '20251230',
+        },
    }]
    _RETURN_TYPE = 'video'
+    _HTTP_HEADERS = {
+        # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
+        # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
+        'referer': 'https://www.patreon.com/',
+    }
+
+    def _extract_from_media_api(self, media_id):
+        attributes = traverse_obj(
+            self._call_api(f'media/{media_id}', media_id, fatal=False),
+            ('data', 'attributes', {dict}))
+        if not attributes:
+            return None
+
+        info_dict = traverse_obj(attributes, {
+            'title': ('file_name', {lambda x: x.rpartition('.')[0]}),
+            'timestamp': ('created_at', {parse_iso8601}),
+            'duration': ('display', 'duration', {float_or_none}),
+        })
+        info_dict['id'] = media_id
+
+        playback_url = traverse_obj(
+            attributes, ('display', (None, 'viewer_playback_data'), 'url', {url_or_none}, any))
+        download_url = traverse_obj(attributes, ('download_url', {url_or_none}))
+
+        if playback_url and mimetype2ext(attributes.get('mimetype')) == 'm3u8':
+            info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(
+                playback_url, media_id, 'mp4', fatal=False, headers=self._HTTP_HEADERS)
+            for f in info_dict['formats']:
+                f['http_headers'] = self._HTTP_HEADERS
+            if transcript_url := traverse_obj(attributes, ('display', 'transcript_url', {url_or_none})):
+                info_dict['subtitles'].setdefault('en', []).append({
+                    'url': transcript_url,
+                    'ext': 'vtt',
+                })
+        elif playback_url or download_url:
+            info_dict['formats'] = [{
+                # If playback_url is available, download_url is a duplicate lower resolution format
+                'url': playback_url or download_url,
+                'vcodec': 'none' if attributes.get('media_type') != 'video' else None,
+            }]
+
+        if not info_dict.get('formats'):
+            return None
+
+        return info_dict

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -299,6 +372,7 @@ class PatreonIE(PatreonBaseIE):
            'comment_count': ('comment_count', {int_or_none}),
        })

+        seen_media_ids = set()
        entries = []
        idx = 0
        for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
@ -320,6 +394,8 @@ class PatreonIE(PatreonBaseIE):
                        'url': download_url,
                        'alt_title': traverse_obj(media_attributes, ('file_name', {str})),
                    })
+                if media_id := traverse_obj(include, ('id', {str})):
+                    seen_media_ids.add(media_id)

            elif include_type == 'user':
                info.update(traverse_obj(include, {
@ -340,34 +416,29 @@ class PatreonIE(PatreonBaseIE):
                    'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
                }))

-        # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
-        # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
-        headers = {'referer': 'https://www.patreon.com/'}
+        if embed_url := traverse_obj(attributes, ('embed', 'url', {url_or_none})):
+            # Convert useless vimeo.com URLs to useful player.vimeo.com embed URLs
+            vimeo_id, vimeo_hash = self._search_regex(
+                r'//vimeo\.com/(\d+)(?:/([\da-f]+))?', embed_url,
+                'vimeo id', group=(1, 2), default=(None, None))
+            if vimeo_id:
+                embed_url = update_url_query(
+                    f'https://player.vimeo.com/video/{vimeo_id}',
+                    {'h': vimeo_hash or []})
+            if VimeoIE.suitable(embed_url):
+                entry = self.url_result(
+                    VimeoIE._smuggle_referrer(embed_url, self._HTTP_HEADERS['referer']),
+                    VimeoIE, url_transparent=True)
+            else:
+                entry = self.url_result(smuggle_url(embed_url, self._HTTP_HEADERS))

-        # handle Vimeo embeds
-        if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
-            v_url = urllib.parse.unquote(self._html_search_regex(
-                r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
-                traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
-            if url_or_none(v_url) and self._request_webpage(
-                    v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
-                    fatal=False, errnote=False, expected_status=429):  # 429 is TLS fingerprint rejection
-                entries.append(self.url_result(
-                    VimeoIE._smuggle_referrer(v_url, headers['referer']),
-                    VimeoIE, url_transparent=True))
-
-        embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
-        if embed_url and (urlh := self._request_webpage(
-                embed_url, video_id, 'Checking embed URL', headers=headers,
-                fatal=False, errnote=False, expected_status=403)):
-            # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
-            # to check for "Sorry, we couldn&amp;rsquo;t find that page" in the meta description tag
-            meta_description = clean_html(self._html_search_meta(
-                'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
-            # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
-            if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
-                    or VidsIoIE.suitable(embed_url)):
-                entries.append(self.url_result(smuggle_url(embed_url, headers)))
+            if urlh := self._request_webpage(
+                embed_url, video_id, 'Checking embed URL', headers=self._HTTP_HEADERS,
+                fatal=False, errnote=False, expected_status=(403, 429),  # Ignore Vimeo 429's
+            ):
+                # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
+                if VidsIoIE.suitable(embed_url) or urlh.status != 403:
+                    entries.append(entry)

        post_file = traverse_obj(attributes, ('post_file', {dict}))
        if post_file:
@ -381,13 +452,27 @@ class PatreonIE(PatreonBaseIE):
                })
            elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
                formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-                    post_file['url'], video_id, headers=headers)
+                    post_file['url'], video_id, headers=self._HTTP_HEADERS)
+                for f in formats:
+                    f['http_headers'] = self._HTTP_HEADERS
                entries.append({
                    'id': video_id,
                    'formats': formats,
                    'subtitles': subtitles,
-                    'http_headers': headers,
                })
+            if media_id := traverse_obj(post_file, ('media_id', {int}, {str_or_none})):
+                seen_media_ids.add(media_id)
+
+        for media_id in traverse_obj(attributes, (
+            'content', {find_elements(attr='data-media-id', value=r'\d+', regex=True, html=True)},
+            ..., {extract_attributes}, 'data-media-id',
+        )):
+            # Inlined media may be duplicates of what was extracted above
+            if media_id in seen_media_ids:
+                continue
+            if media := self._extract_from_media_api(media_id):
+                entries.append(media)
+                seen_media_ids.add(media_id)

        can_view_post = traverse_obj(attributes, 'current_user_can_view')
        comments = None
--- a/yt_dlp/extractor/pbs.py
+++ b/yt_dlp/extractor/pbs.py
@ -453,6 +453,23 @@ class PBSIE(InfoExtractor):
            'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
            'only_matching': True,
        },
+        {
+            # Next.js v13+, see https://github.com/yt-dlp/yt-dlp/issues/13299
+            'url': 'https://www.pbs.org/video/caregiving',
+            'info_dict': {
+                'id': '3101776876',
+                'ext': 'mp4',
+                'title': 'Caregiving - Caregiving',
+                'description': 'A documentary revealing America’s caregiving crisis through intimate stories and expert insight.',
+                'display_id': 'caregiving',
+                'duration': 6783,
+                'thumbnail': 'https://image.pbs.org/video-assets/BSrSkcc-asset-mezzanine-16x9-nlcxQts.jpg',
+                'chapters': [],
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
    ]
    _ERRORS = {
        101: 'We\'re sorry, but this video is not yet available.',
@ -506,6 +523,7 @@ class PBSIE(InfoExtractor):
                r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
                r'<div[^>]+\bdata-cove-id=["\'](\d+)"',  # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
                r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)',  # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
+                r'\\"videoTPMediaId\\":\\\"(\d+)\\"',  # Next.js v13, e.g. https://www.pbs.org/video/caregiving
                r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)',      # last pattern to avoid false positives
            ]

--- a/yt_dlp/extractor/volejtv.py
+++ b/yt_dlp/extractor/volejtv.py
@ -1,40 +1,167 @@
+import functools
+
 from .common import InfoExtractor
+from ..utils import (
+    InAdvancePagedList,
+    int_or_none,
+    join_nonempty,
+    orderedSet,
+    str_or_none,
+    strftime_or_none,
+    unified_timestamp,
+    url_or_none,
+)
+from ..utils.traversal import (
+    require,
+    traverse_obj,
+)


-class VolejTVIE(InfoExtractor):
-    _VALID_URL = r'https?://volej\.tv/video/(?P<id>\d+)'
+class VolejTVBaseIE(InfoExtractor):
+    TBR_HEIGHT_MAPPING = {
+        '6000': 1080,
+        '2400': 720,
+        '1500': 480,
+        '800': 360,
+    }
+
+    def _call_api(self, endpoint, display_id, query=None):
+        return self._download_json(
+            f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/{endpoint}',
+            display_id, query=query)
+
+
+class VolejTVIE(VolejTVBaseIE):
+    IE_NAME = 'volejtv:match'
+    _VALID_URL = r'https?://volej\.tv/match/(?P<id>\d+)'
    _TESTS = [{
-        'url': 'https://volej.tv/video/725742/',
+        'url': 'https://volej.tv/match/270579',
        'info_dict': {
-            'id': '725742',
+            'id': '270579',
            'ext': 'mp4',
-            'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV',
-            'thumbnail': 'https://volej.tv/images/og/16/17186/og.png',
-            'title': 'VK Královo Pole vs VK Prostějov',
+            'title': 'SWE-CZE (2024-06-16)',
+            'categories': ['ženy'],
+            'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA',
+            'season': '2023-2024',
+            'timestamp': 1718553600,
+            'upload_date': '20240616',
        },
    }, {
-        'url': 'https://volej.tv/video/725605/',
+        'url': 'https://volej.tv/match/487520',
        'info_dict': {
-            'id': '725605',
+            'id': '487520',
            'ext': 'mp4',
-            'thumbnail': 'https://volej.tv/images/og/15/17185/og.png',
-            'title': 'VK Lvi Praha vs VK Euro Sitex Příbram',
-            'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV',
+            'thumbnail': r're:https://.+\.(png|jpeg)',
+            'title': 'FRA-CZE (2024-09-06)',
+            'categories': ['mládež'],
+            'series': 'Mistrovství Evropy do 20 let',
+            'season': '2024-2025',
+            'timestamp': 1725627600,
+            'upload_date': '20240906',
+
        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        json_data = self._search_json(
-            r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id)
-        formats, subtitle = self._extract_m3u8_formats_and_subtitles(
-            json_data['urls']['hls'], video_id)
-        return {
+        json_data = self._call_api(f'match/{video_id}', video_id)
+
+        formats = []
+        for video in traverse_obj(json_data, ('videos', 0, 'qualities', lambda _, v: url_or_none(v['cloud_front_path']))):
+            formats.append(traverse_obj(video, {
+                'url': 'cloud_front_path',
+                'tbr': ('quality', {int_or_none}),
+                'format_id': ('id', {str_or_none}),
+                'height': ('quality', {self.TBR_HEIGHT_MAPPING.get}),
+            }))
+
+        data = {
            'id': video_id,
-            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
-            'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
-            'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+            **traverse_obj(json_data, {
+                'series': ('competition_name', {str}),
+                'season': ('season', {str}),
+                'timestamp': ('match_time', {unified_timestamp}),
+                'categories': ('category', ('title'), {str}, filter, all, filter),
+                'thumbnail': ('poster', {url_or_none}),
+            }),
            'formats': formats,
-            'subtitles': subtitle,
        }
+
+        teams = orderedSet(traverse_obj(json_data, ('teams', ..., 'shortcut', {str})))
+        if len(teams) > 2 and 'FIN' in teams:
+            teams.remove('FIN')
+
+        data['title'] = join_nonempty(
+            join_nonempty(*teams, delim='-'),
+            strftime_or_none(data.get('timestamp'), '(%Y-%m-%d)'),
+            delim=' ')
+
+        return data
+
+
+class VolejTVPlaylistBaseIE(VolejTVBaseIE):
+    """Subclasses must set _API_FILTER, _PAGE_SIZE"""
+
+    def _get_page(self, playlist_id, page):
+        return self._call_api(
+            f'match/{self._API_FILTER}/{playlist_id}', playlist_id,
+            query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'})
+
+    def _entries(self, playlist_id, first_page_data, page):
+        entries = first_page_data if page == 0 else self._get_page(playlist_id, page)
+        for match_id in traverse_obj(entries, ('data', ..., 'id')):
+            yield self.url_result(f'https://volej.tv/match/{match_id}', VolejTVIE)
+
+
+class VolejTVClubPlaylistIE(VolejTVPlaylistBaseIE):
+    IE_NAME = 'volejtv:club'
+    _VALID_URL = r'https?://volej\.tv/klub/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://volej.tv/klub/1173',
+        'info_dict': {
+            'id': '1173',
+            'title': 'VK Jihostroj České Budějovice',
+        },
+        'playlist_mincount': 30,
+    }]
+    _API_FILTER = 'by-team-id-paginated'
+    _PAGE_SIZE = 6
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        title = self._call_api(f'team/show/{playlist_id}', playlist_id)['title']
+        first_page_data = self._get_page(playlist_id, 0)
+        total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')}))
+        return self.playlist_result(InAdvancePagedList(
+            functools.partial(self._entries, playlist_id, first_page_data),
+            total_pages, self._PAGE_SIZE), playlist_id, title)
+
+
+class VolejTVCategoryPlaylistIE(VolejTVPlaylistBaseIE):
+    IE_NAME = 'volejtv:category'
+    _VALID_URL = r'https?://volej\.tv/kategorie/(?P<id>[^/$?]+)'
+    _TESTS = [{
+        'url': 'https://volej.tv/kategorie/chance-cesky-pohar',
+        'info_dict': {
+            'id': 'chance-cesky-pohar',
+            'title': 'Chance Český pohár',
+        },
+        'playlist_mincount': 30,
+    }]
+    _API_FILTER = 'by-category-id-paginated'
+    _PAGE_SIZE = 10
+
+    def _get_category(self, playlist_id):
+        categories = self._call_api('category', playlist_id)
+        for category in traverse_obj(categories, (lambda _, v: v['slug'] and v['id'] and v['title'])):
+            if category['slug'] == playlist_id:
+                return category['id'], category['title']
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        category_id, title = self._get_category(playlist_id)
+        first_page_data = self._get_page(category_id, 0)
+        total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')}))
+        return self.playlist_result(InAdvancePagedList(
+            functools.partial(self._entries, category_id, first_page_data),
+            total_pages, self._PAGE_SIZE), playlist_id, title)
--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@ -76,7 +76,7 @@ class WatIE(InfoExtractor):
            if error_code == 'GEOBLOCKED':
                self.raise_geo_restricted(error_desc, video_info.get('geoList'))
            elif error_code == 'DELIVERY_ERROR':
-                if traverse_obj(video_data, ('delivery', 'code')) == 500:
+                if traverse_obj(video_data, ('delivery', 'code')) in (403, 500):
                    self.report_drm(video_id)
                error_desc = join_nonempty(
                    error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
Author	SHA1	Message	Date
rdamas	1c739bf53e	[ie/ERRArhiiv] Add extractor (#15667 ) Closes #15663 Authored by: rdamas	2026-01-27 16:53:38 +00:00
bashonly	e08fdaaec2	[ie/franceinfo] Fix extraction (#15704 ) Closes #15701 Authored by: bashonly	2026-01-27 15:40:47 +00:00
Romain Reignier	ac3a566434	[ie/franceinfo] Support new domain URLs (#15669 ) Closes #13173 Authored by: romainreignier	2026-01-27 14:09:16 +00:00
Alexander Bocken	1f4b26c39f	[ie/TheChosen] Support new URL format (#15687 ) Closes #15686 Authored by: AlexBocken	2026-01-27 14:08:22 +00:00
bashonly	14998eef63	[ie/patreon] Extract inlined media (#15498 ) Closes #15473 Authored by: bashonly	2026-01-27 12:52:49 +00:00
bashonly	a893774096	[ie/dailymotion] Support browser impersonation (#15697 ) Fix 2b61a2a4b20b499d6497c9212207f72a52b922a6 Closes #15526 Authored by: bashonly	2026-01-27 12:47:19 +00:00
nlurker	a810871608	[ie/pbs] Fix extraction (#15083 ) Closes #13299 Authored by: nlurker	2026-01-27 12:45:19 +00:00
Md5Lukas	f9a06197f5	[ie/boosty] Improve metadata extraction (#15543 ) Authored by: Sytm	2026-01-27 12:39:10 +00:00
Mivik	a421eb06d1	[ie/neteasemusic] Fix merged lyrics extraction (#15052 ) Authored by: Mivik	2026-01-27 12:30:11 +00:00
wesson09	bc6ff877dd	[ie/wat.tv] Improve DRM detection (#15659 ) Closes #15647 Authored by: wesson09	2026-01-27 12:29:09 +00:00
Subrat Lima	1effa06dbf	[ie/volejtv] Fix and add extractors (#13226 ) Closes #13203 Authored by: subrat-lima	2026-01-27 12:22:55 +00:00