Fix archiveid and rewrite _extract_fallback

This commit is contained in:
sepro 2025-04-06 22:03:39 +02:00
parent a5903d16ed
commit 56e116cc19

View File

@ -9,18 +9,19 @@ from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
make_archive_id,
merge_dicts, merge_dicts,
parse_codecs, parse_codecs,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urljoin, urljoin,
variadic, variadic,
) )
from ..utils.traversal import traverse_obj
class ZDFBaseIE(InfoExtractor): class ZDFBaseIE(InfoExtractor):
@ -190,7 +191,7 @@ class ZDFIE(ZDFBaseIE):
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500, 'timestamp': 1612462500,
'upload_date': '20210204', 'upload_date': '20210204',
'_old_archive_ids': ['210402_1915_sendung_dok'], '_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
}, },
}, { }, {
# Standalone video (i.e. not part of a playlist), play URL # Standalone video (i.e. not part of a playlist), play URL
@ -206,7 +207,7 @@ class ZDFIE(ZDFBaseIE):
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500, 'timestamp': 1612462500,
'upload_date': '20210204', 'upload_date': '20210204',
'_old_archive_ids': ['210402_1915_sendung_dok'], '_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -223,7 +224,7 @@ class ZDFIE(ZDFBaseIE):
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100', 'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500, 'timestamp': 1612462500,
'upload_date': '20210204', 'upload_date': '20210204',
'_old_archive_ids': ['210402_1915_sendung_dok'], '_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -245,7 +246,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 2, 'episode_number': 2,
'timestamp': 1445797800, 'timestamp': 1445797800,
'upload_date': '20151025', 'upload_date': '20151025',
'_old_archive_ids': ['151025_magie_farben2_tex'], '_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
}, },
}, { }, {
# Video belongs to a playlist, play URL # Video belongs to a playlist, play URL
@ -266,7 +267,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 2, 'episode_number': 2,
'timestamp': 1445797800, 'timestamp': 1445797800,
'upload_date': '20151025', 'upload_date': '20151025',
'_old_archive_ids': ['151025_magie_farben2_tex'], '_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -288,7 +289,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 2, 'episode_number': 2,
'timestamp': 1445797800, 'timestamp': 1445797800,
'upload_date': '20151025', 'upload_date': '20151025',
'_old_archive_ids': ['151025_magie_farben2_tex'], '_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -311,7 +312,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 370, 'episode_number': 370,
'timestamp': 1639946700, 'timestamp': 1639946700,
'upload_date': '20211219', 'upload_date': '20211219',
'_old_archive_ids': ['211219_sendung_hjo_dgs'], '_old_archive_ids': ['zdf 211219_sendung_hjo_dgs'],
}, },
}, { }, {
# Video that requires fallback extraction # Video that requires fallback extraction
@ -326,7 +327,7 @@ class ZDFIE(ZDFBaseIE):
'thumbnail': 'https://www.zdf.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736', 'thumbnail': 'https://www.zdf.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736',
'timestamp': 1743374520, 'timestamp': 1743374520,
'upload_date': '20250330', 'upload_date': '20250330',
'_old_archive_ids': ['250330_clip_2_bdi'], '_old_archive_ids': ['zdf 250330_clip_2_bdi'],
}, },
}, { }, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
@ -346,7 +347,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 1, 'episode_number': 1,
'timestamp': 1635520560, 'timestamp': 1635520560,
'upload_date': '20211029', 'upload_date': '20211029',
'_old_archive_ids': ['video_funk_1770473'], '_old_archive_ids': ['zdf video_funk_1770473'],
}, },
}, { }, {
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
@ -365,7 +366,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 10, 'episode_number': 10,
'timestamp': 1728983700, 'timestamp': 1728983700,
'upload_date': '20241015', 'upload_date': '20241015',
'_old_archive_ids': ['191205_1800_sendung_sok8'], '_old_archive_ids': ['zdf 191205_1800_sendung_sok8'],
}, },
}, { }, {
'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html', 'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html',
@ -384,7 +385,7 @@ class ZDFIE(ZDFBaseIE):
'episode_number': 1, 'episode_number': 1,
'timestamp': 1738546500, 'timestamp': 1738546500,
'upload_date': '20250203', 'upload_date': '20250203',
'_old_archive_ids': ['240319_2310_sendung_not'], '_old_archive_ids': ['zdf 240319_2310_sendung_not'],
}, },
}, { }, {
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
@ -483,35 +484,29 @@ query VideoByCanonical($canonical: String!) {
f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}', f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
document_id, note='Downloading fallback metadata', document_id, note='Downloading fallback metadata',
errnote='Failed to download fallback metadata') errnote='Failed to download fallback metadata')
document = video['document']
content_id = document.get('basename')
formats = [] formats = []
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) format_urls = set()
document = formitaeten and video['document'] for f in traverse_obj(document, ('formitaeten', ..., {dict})):
if formitaeten: self._extract_format(document_id, formats, format_urls, f)
title = document['titel']
content_id = document['basename']
format_urls = set()
for f in formitaeten or []:
self._extract_format(content_id, formats, format_urls, f)
thumbnails = [] thumbnails = []
teaser_bild = document.get('teaserBild') for thumbnail_key, thumbnail in traverse_obj(document, ('teaserBild', {dict.items})):
if isinstance(teaser_bild, dict): thumbnail_url = traverse_obj(thumbnail, ('url', {url_or_none}))
for thumbnail_key, thumbnail in teaser_bild.items(): if not thumbnail_url:
thumbnail_url = try_get( continue
thumbnail, lambda x: x['url'], str) thumbnails.append({
if thumbnail_url: 'url': thumbnail_url,
thumbnails.append({ 'id': thumbnail_key,
'url': thumbnail_url, 'width': int_or_none(thumbnail.get('width')),
'id': thumbnail_key, 'height': int_or_none(thumbnail.get('height')),
'width': int_or_none(thumbnail.get('width')), })
'height': int_or_none(thumbnail.get('height')),
})
return { return {
'id': document_id, 'id': document_id,
'title': title, 'title': document.get('titel'),
'description': document.get('beschreibung'), 'description': document.get('beschreibung'),
'duration': int_or_none(document.get('length')), 'duration': int_or_none(document.get('length')),
'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
@ -519,7 +514,7 @@ query VideoByCanonical($canonical: String!) {
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': self._extract_subtitles(document.get('captions') or []), 'subtitles': self._extract_subtitles(document.get('captions') or []),
'formats': formats, 'formats': formats,
'_old_archive_ids': [content_id], '_old_archive_ids': [make_archive_id(ZDFIE, content_id)] if content_id else [],
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -546,7 +541,7 @@ query VideoByCanonical($canonical: String!) {
# because it is not available during playlist extraction. # because it is not available during playlist extraction.
# We fix it here manually instead of inside the method # We fix it here manually instead of inside the method
# because other extractors do rely on using it as their ID. # because other extractors do rely on using it as their ID.
ptmd_data['_old_archive_ids'] = [ptmd_data['id']] ptmd_data['_old_archive_ids'] = [make_archive_id(ZDFIE, ptmd_data['id'])]
del ptmd_data['id'] del ptmd_data['id']
return { return {