From 2ad9fd8d8f46f5636bf694d20d39735960c74cbe Mon Sep 17 00:00:00 2001
From: InvalidUsernameException
 <InvalidUsernameException@users.noreply.github.com>
Date: Sat, 29 Mar 2025 21:15:26 +0100
Subject: [PATCH] [ie/ZDF] Fix extraction of video from ZDF heute subsite

---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/zdf.py         | 81 ++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 43317ab37b..0b7ba47763 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2602,6 +2602,7 @@ from .zattoo import (
 from .zdf import (
     ZDFIE,
     ZDFCollectionIE,
+    ZDFHeuteIE,
 )
 from .zee5 import (
     Zee5IE,
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
index 95839a6ea3..d0ecbbc431 100644
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@@ -16,6 +16,7 @@ from ..utils import (
     qualities,
     traverse_obj,
     try_get,
+    unified_timestamp,
     update_url_query,
     url_or_none,
     urljoin,
@@ -415,6 +416,10 @@ class ZDFIE(ZDFBaseIE):
         'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if ZDFHeuteIE.suitable(url) else super().suitable(url)
+
     _GRAPHQL_QUERY = '''
 query VideoByCanonical($canonical: String!) {
   videoByCanonical(canonical: $canonical) {
@@ -568,7 +573,7 @@ class ZDFCollectionIE(ZDFBaseIE):
 
     @classmethod
     def suitable(cls, url):
-        return False if ZDFIE.suitable(url) else super().suitable(url)
+        return False if ZDFIE.suitable(url) or ZDFHeuteIE.suitable(url) else super().suitable(url)
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
@@ -631,3 +636,77 @@ class ZDFCollectionIE(ZDFBaseIE):
 
         return self.playlist_result(entries, channel_id, title, traverse_obj(
             collection_data, ('data', 'smartCollectionByCanonical', 'infoText', {str})))
+
+
+# TODO: This extractor is a minimal effort implementation and incomplete.
+# It only does what is necessary to get back the functionality that was present
+# before the redesign of the ZDF website in 2025-03.
+# It uses an API that is no longer used by offical clients,
+# and likely never was at all for the purpase the extractor uses it for.
+# A proper implementation should likely use the API of the mobile app instead:
+# https://zdf-prod-futura.zdf.de/news/documents/ (note 'news' vs 'mediathekV2')
+class ZDFHeuteIE(ZDFBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?zdf\.de/nachrichten/(?:[^/?#]+/)*(?P<id>[^/?#]+)\.html'
+    _TESTS = [{
+        'url': 'https://www.zdf.de/nachrichten/zdfheute-live/beckenbauer-gedenkfeier-muenchen-video-100.html',
+        'md5': 'd28621e4cd8bcdc25fdefdf12dc79a1e',
+        'info_dict': {
+            'id': '240119_beckenbauer_gesamt_hli',
+            'ext': 'mp4',
+            'title': 'Gedenkfeier für Franz Beckenbauer',
+            'description': 'md5:a50f2ee818d4a78f20179b88affbe9da',
+            'duration': 6510,
+            'thumbnail': 'https://www.zdf.de/assets/beckenbauer-trauerfeier-muenchen-tn-102~1920x1080?cb=1705669625816',
+            'timestamp': 1705674600,
+            'upload_date': '20240119',
+        },
+    }]
+
+    def _download_v2_doc(self, document_id):
+        return self._download_json(
+            f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
+            document_id)
+
+    def _extract_mobile(self, video_id):
+        video = self._download_v2_doc(video_id)
+
+        formats = []
+        formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+        document = formitaeten and video['document']
+        if formitaeten:
+            title = document['titel']
+            content_id = document['basename']
+
+            format_urls = set()
+            for f in formitaeten or []:
+                self._extract_format(content_id, formats, format_urls, f)
+
+        thumbnails = []
+        teaser_bild = document.get('teaserBild')
+        if isinstance(teaser_bild, dict):
+            for thumbnail_key, thumbnail in teaser_bild.items():
+                thumbnail_url = try_get(
+                    thumbnail, lambda x: x['url'], str)
+                if thumbnail_url:
+                    thumbnails.append({
+                        'url': thumbnail_url,
+                        'id': thumbnail_key,
+                        'width': int_or_none(thumbnail.get('width')),
+                        'height': int_or_none(thumbnail.get('height')),
+                    })
+
+        return {
+            'id': content_id,
+            'title': title,
+            'description': document.get('beschreibung'),
+            'duration': int_or_none(document.get('length')),
+            'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
+                try_get(video, lambda x: x['meta']['editorialDate'], str)),
+            'thumbnails': thumbnails,
+            'subtitles': self._extract_subtitles(document.get('captions') or []),
+            'formats': formats,
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self._extract_mobile(video_id)