Use PTMD API for fallback extraction as well to get vp9 formats

2026-07-01 15:59:37 +00:00 · 2025-04-20 19:32:11 +02:00 · 2025-04-20 19:32:11 +02:00 · 67c05c9c43
commit 67c05c9c43
parent 692a396be8
1 changed files with 23 additions and 20 deletions
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@ -16,7 +16,6 @@ from ..utils import (
    parse_iso8601,
    parse_qs,
    qualities,
    try_get,
    unified_timestamp,
    url_or_none,
    urljoin,
@ -485,6 +484,16 @@ query VideoByCanonical($canonical: String!) {
 }
    '''
    def _extract_ptmd(self, ptmd_urls, video_id, api_token=None):
        ptmd_data = super()._extract_ptmd(ptmd_urls, video_id, api_token)
        # We can't use the ID from PTMD extraction as the video ID
        # because it is not available during playlist extraction.
        # We fix it here manually instead of inside the base class
        # because other extractors do rely on using it as their ID.
        ptmd_data['_old_archive_ids'] = [make_archive_id(ZDFIE, ptmd_data['id'])]
        del ptmd_data['id']
        return ptmd_data
    # This fallback should generally only happen for pages under `zdf.de/nachrichten`.
    # They are on a separate website for which GraphQL often doesn't return results.
    # The API used here is no longer in use by official clients and likely deprecated.
@ -497,11 +506,10 @@ query VideoByCanonical($canonical: String!) {
            errnote='Failed to download fallback metadata')
        document = video['document']
-        content_id = document.get('basename')
+        ptmd_url = traverse_obj(document, (
-        formats = []
+            ('streamApiUrlAndroid', ('streams', 0, 'streamApiUrlAndroid')),
-        format_urls = set()
+            {url_or_none}, any))
-        for f in traverse_obj(document, ('formitaeten', ..., {dict})):
+        ptmd_data = self._extract_ptmd(ptmd_url, document_id, self._get_api_token(document_id))
            self._extract_format(document_id, formats, format_urls, f)
        thumbnails = []
        for thumbnail_key, thumbnail in traverse_obj(document, ('teaserBild', {dict.items})):
@ -517,15 +525,16 @@ query VideoByCanonical($canonical: String!) {
        return {
            'id': document_id,
            'title': document.get('titel'),
            'description': document.get('beschreibung'),
            'duration': int_or_none(document.get('length')),
            'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
                try_get(video, lambda x: x['meta']['editorialDate'], str)),
            'thumbnails': thumbnails,
-            'subtitles': self._extract_subtitles(document.get('captions') or []),
+            **ptmd_data,
-            'formats': formats,
+            **traverse_obj(video, {
-            '_old_archive_ids': [make_archive_id(ZDFIE, content_id)] if content_id else [],
+                'title': ('document', 'titel', {str}),
                'description': ('document', 'beschreibung', {str}),
                'timestamp': (
                    (('document', 'date'), ('meta', 'editorialDate')),
                    {unified_timestamp}, any),
                'subtitles': ('document', 'captions', {self._extract_subtitles}),
            }),
        }
    def _real_extract(self, url):
@ -543,12 +552,6 @@ query VideoByCanonical($canonical: String!) {
            'currentMedia', 'nodes', ..., 'ptmdTemplate',
            {functools.partial(self._expand_ptmd_template, 'https://api.zdf.de')}))
        ptmd_data = self._extract_ptmd(ptmd_urls, video_id, self._get_api_token(video_id))
        # We can't use the ID from PTMD extraction as the video ID
        # because it is not available during playlist extraction.
        # We fix it here manually instead of inside the method
        # because other extractors do rely on using it as their ID.
        ptmd_data['_old_archive_ids'] = [make_archive_id(ZDFIE, ptmd_data['id'])]
        del ptmd_data['id']
        return {
            'id': video_id,