diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a44601b14d..43317ab37b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2601,7 +2601,7 @@ from .zattoo import ( ) from .zdf import ( ZDFIE, - ZDFChannelIE, + ZDFCollectionIE, ) from .zee5 import ( Zee5IE, diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 376ff672df..bca83f795e 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,4 +1,13 @@ from .zdf import ZDFBaseIE +from ..utils import ( + NO_DEFAULT, + ExtractorError, + int_or_none, + merge_dicts, + traverse_obj, + try_get, + unified_timestamp, +) class DreiSatIE(ZDFBaseIE): @@ -12,40 +21,42 @@ class DreiSatIE(ZDFBaseIE): 'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam', 'description': 'md5:26329ce5197775b596773b939354079d', 'duration': 2625.0, - 'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148', + 'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148', 'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam', 'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95', 'timestamp': 1738593000, 'upload_date': '20250203', }, }, { - # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html - 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html', - 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', + 'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html', + 'md5': '66cb9013ce37f6e008dc99bfcf1356bc', 'info_dict': { - 'id': '141007_ab18_10wochensommer_film', + 'id': '221128_mein_fremdes_ich2_ab18', 'ext': 'mp4', - 'title': 'Ab 18! - 10 Wochen Sommer', - 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', - 'duration': 2660, - 'timestamp': 1608604200, - 'upload_date': '20201222', + 'title': 'Ab 18! - Mein fremdes Ich', + 'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0', + 'duration': 2625.0, + 'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812', + 'episode': 'Ab 18! - Mein fremdes Ich', + 'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f', + 'timestamp': 1695081600, + 'upload_date': '20230919', }, - 'skip': '410 Gone', }, { - 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html', + 'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html', + 'md5': '69f276184c9a24147e4baae728fbc3c4', 'info_dict': { - 'id': '140913_sendung_schweizweit', + 'id': '250323_0903_sendung_sgl', 'ext': 'mp4', - 'title': 'Waidmannsheil', - 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'timestamp': 1410623100, - 'upload_date': '20140913', + 'title': 'Plötzlich ohne dich', + 'description': 'md5:380cc10659289dd91510ad8fa717c66b', + 'duration': 1620.0, + 'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810', + 'episode': 'Plötzlich ohne dich', + 'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b', + 'timestamp': 1743162540, + 'upload_date': '20250328', }, - 'params': { - 'skip_download': True, - }, - 'skip': '404 Not Found', }, { # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', @@ -56,13 +67,67 @@ class DreiSatIE(ZDFBaseIE): 'only_matching': True, }] + def _extract_player(self, webpage, video_id, fatal=True): + return self._parse_json( + self._search_regex( + r'(?s)data-zdfplayer-jsb=(["\'])(?P{.+?})\1', webpage, + 'player JSON', default='{}' if not fatal else NO_DEFAULT, + group='json'), + video_id) + + def _extract_regular(self, url, player, video_id): + player_url = player['content'] + api_token = f'Bearer {player["apiToken"]}' + + content = self._call_api(player_url, video_id, 'video metadata', api_token) + return self._extract_entry(player_url, api_token, content, video_id) + + def _extract_entry(self, url, api_token, content, video_id): + title = content.get('title') or content['teaserHeadline'] + + t = content['mainVideoContent']['http://zdf.de/rels/target'] + ptmd_path = traverse_obj(t, ( + (('streams', 'default'), None), + ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), + ), get_all=False) + if not ptmd_path: + raise ExtractorError('Could not extract ptmd_path') + + info = self._extract_ptmd(url, ptmd_path, video_id, api_token) + layouts = try_get( + content, lambda x: x['teaserImageRef']['layouts'], dict) + thumbnails = self._extract_thumbnails(layouts) + + chapter_marks = t.get('streamAnchorTag') or [] + chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))}) + chapters = [{ + 'start_time': chap.get('anchorOffset'), + 'end_time': next_chap.get('anchorOffset'), + 'title': chap.get('anchorLabel'), + } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] + + return merge_dicts(info, { + 'title': title, + 'description': content.get('leadParagraph') or content.get('teasertext'), + 'duration': int_or_none(t.get('duration')), + 'timestamp': unified_timestamp(content.get('editorialDate')), + 'thumbnails': thumbnails, + 'chapters': chapters or None, + 'episode': title, + **traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', { + 'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}), + 'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}), + 'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}), + 'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}), + 'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode_id': ('contentId', {str}), + })), + }) + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, fatal=False) - if webpage: - player = self._extract_player(webpage, url, fatal=False) - if player: - return self._extract_regular(url, player, video_id) - - return self._extract_mobile(video_id) + webpage = self._download_webpage(url, video_id) + player = self._extract_player(webpage, url) + return self._extract_regular(url, player, video_id) diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py index 63c256019e..40f9dc1d72 100644 --- a/yt_dlp/extractor/phoenix.py +++ b/yt_dlp/extractor/phoenix.py @@ -1,5 +1,3 @@ -import re - from .youtube import YoutubeIE from .zdf import ZDFBaseIE from ..utils import ( @@ -7,7 +5,6 @@ from ..utils import ( merge_dicts, try_get, unified_timestamp, - urljoin, ) @@ -15,36 +12,20 @@ class PhoenixIE(ZDFBaseIE): IE_NAME = 'phoenix.de' _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P\d+)\.html' _TESTS = [{ - # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html - 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', - 'md5': '34ec321e7eb34231fd88616c65c92db0', + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html', + 'md5': '768bbcd67a51fbfb22263ecdbc49146d', 'info_dict': { - 'id': '210222_phx_nachgehakt_corona_protest', + 'id': '221215_phx_spitzbergen', 'ext': 'mp4', - 'title': 'Wohin führt der Protest in der Pandemie?', - 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', - 'duration': 1691, - 'timestamp': 1613902500, - 'upload_date': '20210221', + 'title': 'Spitzbergen', + 'description': 'Film von Tilmann Bünz', + 'duration': 728.0, + 'timestamp': 1555600500, + 'upload_date': '20190418', 'uploader': 'Phoenix', - 'series': 'corona nachgehakt', - 'episode': 'Wohin führt der Protest in der Pandemie?', - }, - }, { - # Youtube embed - 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', - 'info_dict': { - 'id': 'hMQtqFYjomk', - 'ext': 'mp4', - 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', - 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', - 'duration': 3509, - 'upload_date': '20201219', - 'uploader': 'phoenix', - 'uploader_id': 'phoenix', - }, - 'params': { - 'skip_download': True, + 'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png', + 'series': 'Dokumentationen', + 'episode': 'Spitzbergen', }, }, { 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', @@ -90,8 +71,9 @@ class PhoenixIE(ZDFBaseIE): content_id = details['tracking']['nielsen']['content']['assetid'] info = self._extract_ptmd( - f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}', - content_id, None, url) + 'https://tmd.phoenix.de', + f'tmd/2/{{playerId}}/vod/ptmd/phoenix/{content_id}', + content_id) duration = int_or_none(try_get( details, lambda x: x['tracking']['nielsen']['content']['length'])) @@ -101,20 +83,8 @@ class PhoenixIE(ZDFBaseIE): str) episode = title if details.get('contentType') == 'episode' else None - thumbnails = [] teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} - for thumbnail_key, thumbnail_url in teaser_images.items(): - thumbnail_url = urljoin(url, thumbnail_url) - if not thumbnail_url: - continue - thumbnail = { - 'url': thumbnail_url, - } - m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) - if m: - thumbnail['width'] = int(m.group(1)) - thumbnail['height'] = int(m.group(2)) - thumbnails.append(thumbnail) + thumbnails = self._extract_thumbnails(teaser_images) return merge_dicts(info, { 'id': content_id, diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 4bef1017cc..95839a6ea3 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -1,8 +1,9 @@ +import json import re +import time from .common import InfoExtractor from ..utils import ( - NO_DEFAULT, ExtractorError, determine_ext, float_or_none, @@ -10,13 +11,15 @@ from ..utils import ( join_nonempty, merge_dicts, parse_codecs, + parse_iso8601, + parse_qs, qualities, traverse_obj, try_get, - unified_timestamp, update_url_query, url_or_none, urljoin, + variadic, ) @@ -24,30 +27,25 @@ class ZDFBaseIE(InfoExtractor): _GEO_COUNTRIES = ['DE'] _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd') - def _download_v2_doc(self, document_id): + def _call_api(self, url, video_id, item, api_token=None): + headers = {'Api-Auth': api_token} if api_token else {} return self._download_json( - f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}', - document_id) - - def _call_api(self, url, video_id, item, api_token=None, referrer=None): - headers = {} - if api_token: - headers['Api-Auth'] = f'Bearer {api_token}' - if referrer: - headers['Referer'] = referrer - return self._download_json( - url, video_id, f'Downloading JSON {item}', headers=headers) + url, video_id, note=f'Downloading {item}', + errnote=f'Failed to download {item}', headers=headers) @staticmethod def _extract_subtitles(src): + seen_urls = set() subtitles = {} - for caption in try_get(src, lambda x: x['captions'], list) or []: + for caption in src: subtitle_url = url_or_none(caption.get('uri')) - if subtitle_url: - lang = caption.get('language', 'deu') - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - }) + if not subtitle_url or subtitle_url in seen_urls: + continue + seen_urls.add(subtitle_url) + lang = caption.get('language', 'deu') + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + }) return subtitles def _extract_format(self, video_id, formats, format_urls, meta): @@ -86,246 +84,308 @@ class ZDFBaseIE(InfoExtractor): 'quality': qualities(self._QUALITIES)(meta.get('quality')), }) for f in new_formats) - def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer): - ptmd = self._call_api( - ptmd_url, video_id, 'metadata', api_token, referrer) - - content_id = ptmd.get('basename') or ptmd_url.split('/')[-1] + def _extract_ptmd(self, api_base_url, templates, video_id, api_token=None): + # TODO: HTTPS formats are extracted without resolution information + # However, we know vertical resolution and the caller often knows apsect ratio. + # So we could calculate the correct resulution from those two data points. + templates = variadic(templates) + src_captions = [] + content_id = None + duration = None formats = [] track_uris = set() - for p in ptmd['priorityList']: - formitaeten = p.get('formitaeten') - if not isinstance(formitaeten, list): - continue - for f in formitaeten: - f_qualities = f.get('qualities') - if not isinstance(f_qualities, list): + for template in templates: + ptmd_url = urljoin(api_base_url, template.replace( + '{playerId}', 'android_native_6')) + ptmd = self._call_api(ptmd_url, video_id, 'PTMD data', api_token) + content_id = content_id or ptmd.get('basename') or ptmd_url.split('/')[-1] + duration = (duration or float_or_none(try_get( + ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)) + src_captions += ptmd.get('captions') or [] + for p in ptmd['priorityList']: + formitaeten = p.get('formitaeten') + if not isinstance(formitaeten, list): continue - for quality in f_qualities: - tracks = try_get(quality, lambda x: x['audio']['tracks'], list) - if not tracks: + for f in formitaeten: + f_qualities = f.get('qualities') + if not isinstance(f_qualities, list): continue - for track in tracks: - self._extract_format( - content_id, formats, track_uris, { - 'url': track.get('uri'), - 'type': f.get('type'), - 'mimeType': f.get('mimeType'), - 'quality': quality.get('quality'), - 'class': track.get('class'), - 'language': track.get('language'), - }) - - duration = float_or_none(try_get( - ptmd, lambda x: x['attributes']['duration']['value']), scale=1000) + for quality in f_qualities: + tracks = try_get(quality, lambda x: x['audio']['tracks'], list) + if not tracks: + continue + for track in tracks: + self._extract_format( + content_id, formats, track_uris, { + 'url': track.get('uri'), + 'type': f.get('type'), + 'mimeType': f.get('mimeType'), + 'quality': quality.get('quality'), + 'class': track.get('class'), + 'language': track.get('language'), + }) return { 'extractor_key': ZDFIE.ie_key(), 'id': content_id, 'duration': duration, 'formats': formats, - 'subtitles': self._extract_subtitles(ptmd), + 'subtitles': self._extract_subtitles(src_captions), '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'), } - def _extract_player(self, webpage, video_id, fatal=True): - return self._parse_json( - self._search_regex( - r'(?s)data-zdfplayer-jsb=(["\'])(?P{.+?})\1', webpage, - 'player JSON', default='{}' if not fatal else NO_DEFAULT, - group='json'), - video_id) + def _get_api_token(self, video_id): + # As of 2025-03, this API is used by the Android app for getting tokens. + # An equivalent token could be extracted from the webpage should the API become unavailable. + # For now this allows the extractor to avoid dealing with Next.js hydration data. + TOKEN_CACHE_SECTION = 'zdf' + TOKEN_CACHE_KEY = 'api-token' + token_data = self.cache.load(TOKEN_CACHE_SECTION, TOKEN_CACHE_KEY) + if traverse_obj(token_data, ('expires', {int_or_none}), default=0) < int(time.time()): + token_data = self._download_json( + 'https://zdf-prod-futura.zdf.de/mediathekV2/token', video_id, + note='Downloading API token') + self.cache.store(TOKEN_CACHE_SECTION, TOKEN_CACHE_KEY, token_data) + return f'{token_data["type"]} {token_data["token"]}' - def _extract_entry(self, url, player, content, video_id): - title = content.get('title') or content['teaserHeadline'] + def _download_graphql(self, item_id, data_desc, query=None, body=None): + if not query and not body: + raise ExtractorError( + 'GraphQL API requires either query parameters or a body', + video_id=item_id) - t = content['mainVideoContent']['http://zdf.de/rels/target'] - ptmd_path = traverse_obj(t, ( - (('streams', 'default'), None), - ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), - ), get_all=False) - if not ptmd_path: - raise ExtractorError('Could not extract ptmd_path') + return self._download_json( + 'https://api.zdf.de/graphql', item_id, note=f'Downloading {data_desc}', + errnote=f'Failed to download {data_desc}', query=query, + data=json.dumps(body).encode() if body else None, headers={ + 'Api-Auth': self._get_api_token(item_id), + 'Apollo-Require-Preflight': True, + 'Content-Type': 'application/json' if body else None, + }) - info = self._extract_ptmd( - urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url) - - thumbnails = [] - layouts = try_get( - content, lambda x: x['teaserImageRef']['layouts'], dict) - if layouts: - for layout_key, layout_url in layouts.items(): - layout_url = url_or_none(layout_url) - if not layout_url: - continue - thumbnail = { - 'url': layout_url, - 'format_id': layout_key, - } - mobj = re.search(r'(?P\d+)x(?P\d+)', layout_key) - if mobj: - thumbnail.update({ - 'width': int(mobj.group('width')), - 'height': int(mobj.group('height')), - }) - thumbnails.append(thumbnail) - - chapter_marks = t.get('streamAnchorTag') or [] - chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))}) - chapters = [{ - 'start_time': chap.get('anchorOffset'), - 'end_time': next_chap.get('anchorOffset'), - 'title': chap.get('anchorLabel'), - } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] - - return merge_dicts(info, { - 'title': title, - 'description': content.get('leadParagraph') or content.get('teasertext'), - 'duration': int_or_none(t.get('duration')), - 'timestamp': unified_timestamp(content.get('editorialDate')), - 'thumbnails': thumbnails, - 'chapters': chapters or None, - 'episode': title, - **traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', { - 'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}), - 'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}), - 'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}), - 'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}), - 'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}), - 'episode_number': ('episodeNumber', {int_or_none}), - 'episode_id': ('contentId', {str}), - })), - }) - - def _extract_regular(self, url, player, video_id, query=None): - player_url = player['content'] - - content = self._call_api( - update_url_query(player_url, query), - video_id, 'content', player['apiToken'], url) - - return self._extract_entry(player_url, player, content, video_id) - - def _extract_mobile(self, video_id): - video = self._download_v2_doc(video_id) - - formats = [] - formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) - document = formitaeten and video['document'] - if formitaeten: - title = document['titel'] - content_id = document['basename'] - - format_urls = set() - for f in formitaeten or []: - self._extract_format(content_id, formats, format_urls, f) - - thumbnails = [] - teaser_bild = document.get('teaserBild') - if isinstance(teaser_bild, dict): - for thumbnail_key, thumbnail in teaser_bild.items(): - thumbnail_url = try_get( - thumbnail, lambda x: x['url'], str) - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'id': thumbnail_key, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - return { - 'id': content_id, - 'title': title, - 'description': document.get('beschreibung'), - 'duration': int_or_none(document.get('length')), - 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( - try_get(video, lambda x: x['meta']['editorialDate'], str)), - 'thumbnails': thumbnails, - 'subtitles': self._extract_subtitles(document), - 'formats': formats, - } + @staticmethod + def _extract_thumbnails(source): + return [{ + 'id': format_id, + 'url': url, + 'preference': 1 if format_id == 'original' else 0, + **traverse_obj(re.search(r'(?P\d+|auto)[Xx](?P\d+|auto)', format_id), { + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + } for format_id, url in (source or {}).items() if url] class ZDFIE(ZDFBaseIE): - _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' + _VALID_URL = [ + # Legacy URLs end in .html and redirect + r'https?://(?:www\.)?zdf\.de/(?:[^/?#]+/)*(?P[^/?#]+)\.html', + r'https?://(?:www\.)?zdf\.de/(?:video|play)/(?:[^/?#]+/)*(?P[^/?#]+)/?', + ] _TESTS = [{ - # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html - 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', - 'md5': '34ec321e7eb34231fd88616c65c92db0', + # Standalone video (i.e. not part of a playlist), video URL + 'url': 'https://www.zdf.de/video/dokus/sylt---deutschlands-edles-nordlicht-movie-100/sylt-deutschlands-edles-nordlicht-100', 'info_dict': { - 'id': '210222_phx_nachgehakt_corona_protest', + 'id': 'sylt-deutschlands-edles-nordlicht-100', 'ext': 'mp4', - 'title': 'Wohin führt der Protest in der Pandemie?', - 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', - 'duration': 1691, - 'timestamp': 1613948400, - 'upload_date': '20210221', + 'title': 'Sylt - Deutschlands edles Nordlicht', + 'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3', + 'duration': 810.0, + 'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011', + 'series': 'Sylt - Deutschlands edles Nordlicht', + 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', + 'timestamp': 1612462500, + 'upload_date': '20210204', + '_old_archive_ids': ['210402_1915_sendung_dok'], }, - 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', }, { - # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html - 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', - 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', + # Standalone video (i.e. not part of a playlist), play URL + 'url': 'https://www.zdf.de/play/dokus/sylt---deutschlands-edles-nordlicht-movie-100/sylt-deutschlands-edles-nordlicht-100', 'info_dict': { - 'id': '141007_ab18_10wochensommer_film', + 'id': 'sylt-deutschlands-edles-nordlicht-100', 'ext': 'mp4', - 'title': 'Ab 18! - 10 Wochen Sommer', - 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', - 'duration': 2660, - 'timestamp': 1608604200, - 'upload_date': '20201222', + 'title': 'Sylt - Deutschlands edles Nordlicht', + 'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3', + 'duration': 810.0, + 'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011', + 'series': 'Sylt - Deutschlands edles Nordlicht', + 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', + 'timestamp': 1612462500, + 'upload_date': '20210204', + '_old_archive_ids': ['210402_1915_sendung_dok'], }, - 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', + 'params': {'skip_download': True}, }, { - 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html', + # Standalone video (i.e. not part of a playlist), legacy URL before website redesign in 2025-03 + 'url': 'https://www.zdf.de/dokumentation/dokumentation-sonstige/sylt-deutschlands-edles-nordlicht-100.html', 'info_dict': { - 'id': '211230_sendung_hjo', + 'id': 'sylt-deutschlands-edles-nordlicht-100', 'ext': 'mp4', - 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839', - 'duration': 1890.0, - 'upload_date': '20211230', - 'chapters': list, - 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e', - 'title': 'heute journal vom 30.12.2021', - 'timestamp': 1640897100, + 'title': 'Sylt - Deutschlands edles Nordlicht', + 'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3', + 'duration': 810.0, + 'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011', + 'series': 'Sylt - Deutschlands edles Nordlicht', + 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', + 'timestamp': 1612462500, + 'upload_date': '20210204', + '_old_archive_ids': ['210402_1915_sendung_dok'], }, - 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', + 'params': {'skip_download': True}, }, { - 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', + # Video belongs to a playlist, video URL + 'url': 'https://www.zdf.de/video/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', + 'md5': '1eda17eb40a9ead3046326e10b9c5973', 'info_dict': { - 'id': '151025_magie_farben2_tex', + 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', 'ext': 'mp4', - 'duration': 2615.0, - 'title': 'Die Magie der Farben (2/2)', + 'title': 'Von Königspurpur bis Jeansblau', 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', - 'timestamp': 1465021200, - 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806', - 'upload_date': '20160604', - 'episode': 'Die Magie der Farben (2/2)', - 'episode_id': 'POS_954f4170-36a5-4a41-a6cf-78f1f3b1f127', - 'season': 'Staffel 1', + 'duration': 2615.0, + 'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971', 'series': 'Die Magie der Farben', + 'series_id': 'die-magie-der-farben-116', + 'season': 'Season 1', 'season_number': 1, - 'series_id': 'a39900dd-cdbd-4a6a-a413-44e8c6ae18bc', - 'season_id': '5a92e619-8a0f-4410-a3d5-19c76fbebb37', + 'episode': 'Episode 2', 'episode_number': 2, + 'timestamp': 1445797800, + 'upload_date': '20151025', + '_old_archive_ids': ['151025_magie_farben2_tex'], + }, + }, { + # Video belongs to a playlist, play URL + 'url': 'https://www.zdf.de/play/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', + 'md5': '1eda17eb40a9ead3046326e10b9c5973', + 'info_dict': { + 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', + 'ext': 'mp4', + 'title': 'Von Königspurpur bis Jeansblau', + 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', + 'duration': 2615.0, + 'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971', + 'series': 'Die Magie der Farben', + 'series_id': 'die-magie-der-farben-116', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'timestamp': 1445797800, + 'upload_date': '20151025', + '_old_archive_ids': ['151025_magie_farben2_tex'], + }, + 'params': {'skip_download': True}, + }, { + # Video belongs to a playlist, legacy URL before website redesign in 2025-03 + 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', + 'md5': '1eda17eb40a9ead3046326e10b9c5973', + 'info_dict': { + 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', + 'ext': 'mp4', + 'title': 'Von Königspurpur bis Jeansblau', + 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', + 'duration': 2615.0, + 'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971', + 'series': 'Die Magie der Farben', + 'series_id': 'die-magie-der-farben-116', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'timestamp': 1445797800, + 'upload_date': '20151025', + '_old_archive_ids': ['151025_magie_farben2_tex'], + }, + 'params': {'skip_download': True}, + }, { + # Video with chapters + 'url': 'https://www.zdf.de/video/magazine/heute-journal-104/heute-journal-vom-19-12-2021-100', + 'md5': '1175003f28507bd27b266181c4de9f56', + 'info_dict': { + 'id': 'heute-journal-vom-19-12-2021-100', + 'ext': 'mp4', + 'title': 'heute journal vom 19.12.2021', + 'description': 'md5:02504cf3b03777ff32fcc927d260c5dd', + 'duration': 1770.0, + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/273e5545-16e7-4ca3-898e-52fe9e06d964?layout=1920x1080', + 'chapters': 'count:11', + 'series': 'heute journal', + 'series_id': 'heute-journal-104', + 'season': 'Season 2021', + 'season_number': 2021, + 'episode': 'Episode 370', + 'episode_number': 370, + 'timestamp': 1639946700, + 'upload_date': '20211219', + '_old_archive_ids': ['211219_sendung_hjo_dgs'], }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', 'md5': '57af4423db0455a3975d2dc4578536bc', 'info_dict': { + 'id': 'funk-alles-ist-verzaubert-102', 'ext': 'mp4', - 'id': 'video_funk_1770473', - 'duration': 1278.0, 'title': 'Alles ist verzaubert', 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', + 'duration': 1278.0, + 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~original?cb=1663848412907', + 'series': 'DRUCK', + 'series_id': 'funk-collection-funk-11790-1590', + 'season': 'Season 7', + 'season_number': 7, + 'episode': 'Episode 1', + 'episode_number': 1, 'timestamp': 1635520560, - 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907', 'upload_date': '20211029', - 'episode': 'Alles ist verzaubert', + '_old_archive_ids': ['video_funk_1770473'], }, + }, { + 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', + 'info_dict': { + 'id': 'das-geld-anderer-leute-100', + 'ext': 'mp4', + 'title': 'Das Geld anderer Leute', + 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', + 'duration': 2581.0, + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=1920x1080', + 'series': 'SOKO Stuttgart', + 'series_id': 'soko-stuttgart-104', + 'season': 'Season 11', + 'season_number': 11, + 'episode': 'Episode 10', + 'episode_number': 10, + 'timestamp': 1728983700, + 'upload_date': '20241015', + '_old_archive_ids': ['191205_1800_sendung_sok8'], + }, + }, { + 'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html', + 'info_dict': { + 'id': 'begegnung-auf-der-bruecke-100', + 'ext': 'mp4', + 'title': 'Begegnung auf der Brücke', + 'description': 'md5:e53a555da87447f7f1207f10353f8e45', + 'duration': 3083.0, + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/c5ff1d1f-f5c8-4468-86ac-1b2f1dbecc76?layout=1920x1080', + 'series': 'Northern Lights', + 'series_id': 'northern-lights-100', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1738546500, + 'upload_date': '20250203', + '_old_archive_ids': ['240319_2310_sendung_not'], + }, + }, { + # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html + 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', + 'only_matching': True, + }, { + # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html + 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', + 'only_matching': True, }, { # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html', @@ -349,108 +409,156 @@ class ZDFIE(ZDFBaseIE): 'only_matching': True, }, { 'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html', - 'info_dict': { - 'id': 'video_artede_083871-001-A', - 'ext': 'mp4', - 'title': 'Tödliche Flucht (1/6)', - 'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315', - 'duration': 3193.0, - 'timestamp': 1641355200, - 'upload_date': '20220105', - }, - 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"', - }, { - 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', - 'info_dict': { - 'id': '191205_1800_sendung_sok8', - 'ext': 'mp4', - 'title': 'Das Geld anderer Leute', - 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', - 'duration': 2581.0, - 'timestamp': 1728983700, - 'upload_date': '20241015', - 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350', - 'series': 'SOKO Stuttgart', - 'series_id': 'f862ce9a-6dd1-4388-a698-22b36ac4c9e9', - 'season': 'Staffel 11', - 'season_number': 11, - 'season_id': 'ae1b4990-6d87-4970-a571-caccf1ba2879', - 'episode': 'Das Geld anderer Leute', - 'episode_number': 10, - 'episode_id': 'POS_7f367934-f2f0-45cb-9081-736781ff2d23', - }, + 'only_matching': True, }, { 'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html', - 'info_dict': { - 'id': '220525_green_planet_makingof_1_tropen_tex', - 'ext': 'mp4', - 'title': 'Making-of Unser grüner Planet - Tropen', - 'description': 'md5:d7c6949dc7c75c73c4ad51c785fb0b79', - 'duration': 435.0, - 'timestamp': 1653811200, - 'upload_date': '20220529', - 'format_note': 'hd, main', - 'thumbnail': 'https://www.zdf.de/assets/unser-gruener-planet-making-of-1-tropen-100~3840x2160?cb=1653493335577', - 'episode': 'Making-of Unser grüner Planet - Tropen', - }, - 'skip': 'No longer available: "Leider kein Video verfügbar"', - }, { - 'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html', - 'info_dict': { - 'id': '240319_2310_sendung_not', - 'ext': 'mp4', - 'title': 'Begegnung auf der Brücke', - 'description': 'md5:e53a555da87447f7f1207f10353f8e45', - 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/c5ff1d1f-f5c8-4468-86ac-1b2f1dbecc76?layout=2400x1350', - 'upload_date': '20250203', - 'duration': 3083.0, - 'timestamp': 1738546500, - 'series_id': '1d7a1879-01ee-4468-8237-c6b4ecd633c7', - 'series': 'Northern Lights', - 'season': 'Staffel 1', - 'season_number': 1, - 'season_id': '22ac26a2-4ea2-4055-ac0b-98b755cdf718', - 'episode': 'Begegnung auf der Brücke', - 'episode_number': 1, - 'episode_id': 'POS_71049438-024b-471f-b472-4fe2e490d1fb', - }, + 'only_matching': True, }] + _GRAPHQL_QUERY = ''' +query VideoByCanonical($canonical: String!) { + videoByCanonical(canonical: $canonical) { + canonical + title + leadParagraph + editorialDate + teaser { + description + image { + list + } + } + episodeInfo { + episodeNumber + seasonNumber + } + smartCollection { + canonical + title + } + currentMedia { + nodes { + ptmdTemplate + ... on VodMedia { + duration + aspectRatio + streamAnchorTags { + nodes { + anchorOffset + anchorLabel + } + } + vodMediaType + label + } + ... on LiveMedia { + start + stop + encryption + liveMediaType + label + } + id + } + } + } +} + ''' + def _real_extract(self, url): video_id = self._match_id(url) + video_data = self._download_graphql(video_id, 'video metadata', body={ + 'operationName': 'VideoByCanonical', + 'query': self._GRAPHQL_QUERY, + 'variables': {'canonical': video_id}, + }) + # TODO: If there are multiple PTMD templates, + # usually one of them is a sign-language variant of the video. + # The format order works out fine as is and prefers the "normal" video, + # but this should probably be made more explicit. + ptmd_templates = traverse_obj( + video_data, + ('data', 'videoByCanonical', 'currentMedia', 'nodes', ..., 'ptmdTemplate')) + ptmd_data = self._extract_ptmd( + 'https://api.zdf.de', ptmd_templates, video_id, + self._get_api_token(video_id)) + # We can't use the ID from PTMD extraction as the video ID + # because it is not available during playlist extraction. + # We fix it here manually instead of inside the method + # because other extractors do rely on using it as their ID. + ptmd_data['_old_archive_ids'] = [ptmd_data['id']] + del ptmd_data['id'] - webpage = self._download_webpage(url, video_id, fatal=False) - if webpage: - player = self._extract_player(webpage, url, fatal=False) - if player: - return self._extract_regular(url, player, video_id, query={'profile': 'player-3'}) + return { + 'id': video_id, + **ptmd_data, + **traverse_obj(video_data, ('data', 'videoByCanonical', { + 'title': ('title', {str}), + 'description': (('leadParagraph', ('teaser', 'description')), any, {str}), + 'timestamp': ('editorialDate', {parse_iso8601}), + 'thumbnails': ('teaser', 'image', 'list', {self._extract_thumbnails}), + 'episode_number': ('episodeInfo', 'episodeNumber', {int_or_none}), + 'season_number': ('episodeInfo', 'seasonNumber', {int_or_none}), + 'series': ('smartCollection', 'title', {str}), + 'series_id': ('smartCollection', 'canonical', {str}), + 'chapters': ('currentMedia', 'nodes', 0, {self._extract_chapters}), + })), + } - return self._extract_mobile(video_id) + def _extract_chapters(self, data): + chapter_marks = traverse_obj(data, ('streamAnchorTags', 'nodes')) + chapter_marks.append({'anchorOffset': float_or_none(data.get('duration'))}) + return [{ + 'start_time': chap.get('anchorOffset'), + 'end_time': next_chap.get('anchorOffset'), + 'title': chap.get('anchorLabel'), + } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] or None -class ZDFChannelIE(ZDFBaseIE): - _VALID_URL = r'https?://www\.zdf\.de/(?:[^/?#]+/)*(?P[^/?#&]+)' +class ZDFCollectionIE(ZDFBaseIE): + _VALID_URL = r'https?://www\.zdf\.de/(?:[^/?#]+/)*(?P[^/?#]+)' _TESTS = [{ + # Playlist, legacy URL before website redesign in 2025-03 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 'info_dict': { - 'id': 'das-aktuelle-sportstudio', + 'id': 'das-aktuelle-sportstudio-220', 'title': 'das aktuelle sportstudio', + 'description': 'md5:e46c785324238a03edcf8b301c5fd5dc', }, 'playlist_mincount': 18, }, { - 'url': 'https://www.zdf.de/dokumentation/planet-e', + # Playlist, current URL + 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio-220', 'info_dict': { - 'id': 'planet-e', - 'title': 'planet e.', - 'description': 'md5:87e3b9c66a63cf1407ee443d2c4eb88e', + 'id': 'das-aktuelle-sportstudio-220', + 'title': 'das aktuelle sportstudio', + 'description': 'md5:e46c785324238a03edcf8b301c5fd5dc', }, - 'playlist_mincount': 50, + 'playlist_mincount': 18, + }, { + # Standalone video (i.e. not part of a playlist), collection URL + 'add_ie': [ZDFIE.ie_key()], + 'url': 'https://www.zdf.de/dokus/sylt---deutschlands-edles-nordlicht-movie-100', + 'info_dict': { + 'id': 'sylt-deutschlands-edles-nordlicht-100', + 'ext': 'mp4', + 'title': 'Sylt - Deutschlands edles Nordlicht', + 'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3', + 'duration': 810.0, + 'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011', + 'series': 'Sylt - Deutschlands edles Nordlicht', + 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', + 'timestamp': 1612462500, + 'upload_date': '20210204', + '_old_archive_ids': ['210402_1915_sendung_dok'], + }, + 'params': {'skip_download': True}, }, { 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest', 'info_dict': { - 'id': 'aktenzeichen-xy-ungeloest', + 'id': 'aktenzeichen-xy-ungeloest-110', 'title': 'Aktenzeichen XY... Ungelöst', - 'description': 'md5:623ede5819c400c6d04943fa8100e6e7', + 'description': 'md5:b79ac0d64b979e53cbe510c0ca9cb7be', }, 'playlist_mincount': 2, }, { @@ -462,42 +570,64 @@ class ZDFChannelIE(ZDFBaseIE): def suitable(cls, url): return False if ZDFIE.suitable(url) else super().suitable(url) - def _extract_entry(self, entry): - return self.url_result( - entry['sharingUrl'], ZDFIE, **traverse_obj(entry, { - 'id': ('basename', {str}), - 'title': ('titel', {str}), - 'description': ('beschreibung', {str}), - 'duration': ('length', {float_or_none}), - 'season_number': ('seasonNumber', {int_or_none}), - 'episode_number': ('episodeNumber', {int_or_none}), - })) - - def _entries(self, data, document_id): - for entry in traverse_obj(data, ( - 'cluster', lambda _, v: v['type'] == 'teaser', - # If 'brandId' differs, it is a 'You might also like' video. Filter these out - 'teaser', lambda _, v: v['type'] == 'video' and v['brandId'] == document_id and v['sharingUrl'], - )): - yield self._extract_entry(entry) - def _real_extract(self, url): channel_id = self._match_id(url) - webpage = self._download_webpage(url, channel_id) - document_id = self._search_regex( - r'docId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'document id', group='doc_id') - data = self._download_v2_doc(document_id) + webpage, urlh = self._download_webpage_handle(url, channel_id) + # Make sure to get the correct ID in case of redirects + channel_id = self._search_regex(self._VALID_URL, urlh.url, 'channel id', group='id') - main_video = traverse_obj(data, ( - 'cluster', lambda _, v: v['type'] == 'teaserContent', - 'teaser', lambda _, v: v['type'] == 'video' and v['basename'] and v['sharingUrl'], any)) or {} + collection_data = self._download_graphql( + channel_id, 'smart collection data', query={ + 'operationName': 'GetSmartCollectionByCanonical', + 'variables': json.dumps({ + 'canonical': channel_id, + 'videoPageSize': 100, + }), + 'extensions': json.dumps({ + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'cb49420e133bd668ad895a8cea0e65cba6aa11ac1cacb02341ff5cf32a17cd02', + }, + }), + }) - if not self._yes_playlist(channel_id, main_video.get('basename')): - return self._extract_entry(main_video) + video_data = traverse_obj( + collection_data, ('data', 'smartCollectionByCanonical', 'video')) + season_data = traverse_obj( + collection_data, ('data', 'smartCollectionByCanonical', 'seasons')) - return self.playlist_result( - self._entries(data, document_id), channel_id, - re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', self._og_search_title(webpage) or '')[0] or None, - join_nonempty( - 'headline', 'text', delim='\n\n', - from_dict=traverse_obj(data, ('shortText', {dict}), default={})) or None) + if not self._yes_playlist( + channel_id if season_data else None, + traverse_obj(video_data, ('canonical', {str}))): + return self.url_result(video_data['sharingUrl'], ie=ZDFIE) + + title = (traverse_obj(collection_data, ('data', 'smartCollectionByCanonical', 'title')) + or self._html_search_meta( + ['og:title', 'title', 'twitter:title'], webpage, 'title', fatal=False)) + + needs_pagination = traverse_obj(season_data, ( + 'seasons', ..., 'episodes', 'pageInfo', 'hasNextPage', + lambda _, v: v is True, any), default=False) + if needs_pagination: + # TODO: Implement pagination for collections with long seasons + # e.g. https://www.zdf.de/magazine/heute-journal-104 + self.report_warning('This collections contains seasons with more than 100 episodes, some episodes are missing from the result.') + + videos = traverse_obj(season_data, ('seasons', ..., 'episodes', 'videos', ...)) + season_id = parse_qs(url).get('staffel', [None])[-1] + videos = [v for v in videos + if not season_id or season_id == str(traverse_obj(v, ('episodeInfo', 'seasonNumber')))] + entries = [self.url_result(video['sharingUrl'], ZDFIE, **traverse_obj(video, { + 'id': ('canonical', {str}), + 'title': ('teaser', 'title', {str}), + 'description': (('leadParagraph', ('teaser', 'description')), any, {str}), + 'timestamp': ('editorialDate', {parse_iso8601}), + 'thumbnails': ('teaser', 'imageWithoutLogo', 'layouts', {self._extract_thumbnails}), + 'episode_number': ('episodeInfo', 'episodeNumber', {int_or_none}), + 'season_number': ('episodeInfo', 'seasonNumber', {int_or_none}), + 'series_id': {lambda _: channel_id}, + 'series': {lambda _: title}, + })) for video in videos or [] if video.get('currentMediaType') != 'NOVIDEO'] + + return self.playlist_result(entries, channel_id, title, traverse_obj( + collection_data, ('data', 'smartCollectionByCanonical', 'infoText', {str})))