From a5903d16edd4750b22775f2447bb4574e90f19c2 Mon Sep 17 00:00:00 2001 From: InvalidUsernameException Date: Tue, 1 Apr 2025 22:05:13 +0200 Subject: [PATCH] [ie/ZDF] Simplify and improve format extraction --- yt_dlp/extractor/zdf.py | 66 ++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 29b77b797c..f559b6d188 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -17,7 +17,6 @@ from ..utils import ( traverse_obj, try_get, unified_timestamp, - update_url_query, url_or_none, urljoin, variadic, @@ -56,40 +55,36 @@ class ZDFBaseIE(InfoExtractor): return subtitles def _extract_format(self, video_id, formats, format_urls, meta): - format_url = url_or_none(meta.get('url')) - if not format_url or format_url in format_urls: + format_url = meta['url'] + if format_url in format_urls: return format_urls.add(format_url) - - mime_type, ext = meta.get('mimeType'), determine_ext(format_url) - if mime_type == 'application/x-mpegURL' or ext == 'm3u8': - new_formats = self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls', - entry_protocol='m3u8_native', fatal=False) - elif mime_type == 'application/f4m+xml' or ext == 'f4m': - new_formats = self._extract_f4m_formats( - update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False) + ext = determine_ext(format_url) + if ext == 'm3u8': + fmts = self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) elif ext == 'mpd': - new_formats = self._extract_mpd_formats( + fmts = self._extract_mpd_formats( format_url, video_id, mpd_id='dash', fatal=False) else: f = parse_codecs(meta.get('mimeCodec')) if not f and meta.get('type'): data = meta['type'].split('_') - if try_get(data, lambda x: x[2]) == ext: + if len(data) >= 3 and data[2] == ext: f = {'vcodec': data[0], 'acodec': data[1]} f.update({ 'url': format_url, + 'height': int_or_none(meta.get('highestVerticalResolution')), 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')), 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)), }) - new_formats = [f] + fmts = [f] formats.extend(merge_dicts(f, { - 'format_note': join_nonempty('quality', 'class', from_dict=meta, delim=', '), + 'format_note': join_nonempty(meta.get('quality'), meta.get('class'), delim=', '), 'language': meta.get('language'), 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1, 'quality': qualities(self._QUALITIES)(meta.get('quality')), - }) for f in new_formats) + }) for f in fmts) def _extract_ptmd(self, api_base_url, templates, video_id, api_token=None): # TODO: HTTPS formats are extracted without resolution information @@ -107,31 +102,20 @@ class ZDFBaseIE(InfoExtractor): '{playerId}', 'android_native_6')) ptmd = self._call_api(ptmd_url, video_id, 'PTMD data', api_token) content_id = content_id or ptmd.get('basename') or ptmd_url.split('/')[-1] - duration = (duration or float_or_none(try_get( - ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)) + duration = (duration or traverse_obj(ptmd, ('attributes', 'duration', 'value', {float_or_none(scale=1000)}))) src_captions += ptmd.get('captions') or [] - for p in ptmd['priorityList']: - formitaeten = p.get('formitaeten') - if not isinstance(formitaeten, list): - continue - for f in formitaeten: - f_qualities = f.get('qualities') - if not isinstance(f_qualities, list): - continue - for quality in f_qualities: - tracks = try_get(quality, lambda x: x['audio']['tracks'], list) - if not tracks: - continue - for track in tracks: - self._extract_format( - content_id, formats, track_uris, { - 'url': track.get('uri'), - 'type': f.get('type'), - 'mimeType': f.get('mimeType'), - 'quality': quality.get('quality'), - 'class': track.get('class'), - 'language': track.get('language'), - }) + for stream in traverse_obj(ptmd, ('priorityList', ..., 'formitaeten', ..., {dict})): + for quality in traverse_obj(stream, ('qualities', ..., {dict})): + for variant in traverse_obj(quality, ('audio', 'tracks', lambda _, v: url_or_none(v['uri']))): + self._extract_format( + content_id, formats, track_uris, { + 'url': variant.get('uri'), + 'type': stream.get('type'), + 'mimeCodec': quality.get('mimeCodec'), + 'quality': quality.get('quality'), + 'class': variant.get('class'), + 'language': variant.get('language'), + }) return { 'extractor_key': ZDFIE.ie_key(),