Use PTMD API for fallback extraction as well to get vp9 formats

This commit is contained in:
InvalidUsernameException 2025-04-20 19:32:11 +02:00
parent 692a396be8
commit 67c05c9c43

View File

@ -16,7 +16,6 @@ from ..utils import (
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urljoin, urljoin,
@ -485,6 +484,16 @@ query VideoByCanonical($canonical: String!) {
} }
''' '''
def _extract_ptmd(self, ptmd_urls, video_id, api_token=None):
ptmd_data = super()._extract_ptmd(ptmd_urls, video_id, api_token)
# We can't use the ID from PTMD extraction as the video ID
# because it is not available during playlist extraction.
# We fix it here manually instead of inside the base class
# because other extractors do rely on using it as their ID.
ptmd_data['_old_archive_ids'] = [make_archive_id(ZDFIE, ptmd_data['id'])]
del ptmd_data['id']
return ptmd_data
# This fallback should generally only happen for pages under `zdf.de/nachrichten`. # This fallback should generally only happen for pages under `zdf.de/nachrichten`.
# They are on a separate website for which GraphQL often doesn't return results. # They are on a separate website for which GraphQL often doesn't return results.
# The API used here is no longer in use by official clients and likely deprecated. # The API used here is no longer in use by official clients and likely deprecated.
@ -497,11 +506,10 @@ query VideoByCanonical($canonical: String!) {
errnote='Failed to download fallback metadata') errnote='Failed to download fallback metadata')
document = video['document'] document = video['document']
content_id = document.get('basename') ptmd_url = traverse_obj(document, (
formats = [] ('streamApiUrlAndroid', ('streams', 0, 'streamApiUrlAndroid')),
format_urls = set() {url_or_none}, any))
for f in traverse_obj(document, ('formitaeten', ..., {dict})): ptmd_data = self._extract_ptmd(ptmd_url, document_id, self._get_api_token(document_id))
self._extract_format(document_id, formats, format_urls, f)
thumbnails = [] thumbnails = []
for thumbnail_key, thumbnail in traverse_obj(document, ('teaserBild', {dict.items})): for thumbnail_key, thumbnail in traverse_obj(document, ('teaserBild', {dict.items})):
@ -517,15 +525,16 @@ query VideoByCanonical($canonical: String!) {
return { return {
'id': document_id, 'id': document_id,
'title': document.get('titel'),
'description': document.get('beschreibung'),
'duration': int_or_none(document.get('length')),
'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
try_get(video, lambda x: x['meta']['editorialDate'], str)),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': self._extract_subtitles(document.get('captions') or []), **ptmd_data,
'formats': formats, **traverse_obj(video, {
'_old_archive_ids': [make_archive_id(ZDFIE, content_id)] if content_id else [], 'title': ('document', 'titel', {str}),
'description': ('document', 'beschreibung', {str}),
'timestamp': (
(('document', 'date'), ('meta', 'editorialDate')),
{unified_timestamp}, any),
'subtitles': ('document', 'captions', {self._extract_subtitles}),
}),
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -543,12 +552,6 @@ query VideoByCanonical($canonical: String!) {
'currentMedia', 'nodes', ..., 'ptmdTemplate', 'currentMedia', 'nodes', ..., 'ptmdTemplate',
{functools.partial(self._expand_ptmd_template, 'https://api.zdf.de')})) {functools.partial(self._expand_ptmd_template, 'https://api.zdf.de')}))
ptmd_data = self._extract_ptmd(ptmd_urls, video_id, self._get_api_token(video_id)) ptmd_data = self._extract_ptmd(ptmd_urls, video_id, self._get_api_token(video_id))
# We can't use the ID from PTMD extraction as the video ID
# because it is not available during playlist extraction.
# We fix it here manually instead of inside the method
# because other extractors do rely on using it as their ID.
ptmd_data['_old_archive_ids'] = [make_archive_id(ZDFIE, ptmd_data['id'])]
del ptmd_data['id']
return { return {
'id': video_id, 'id': video_id,