[ie/ZDF] Simplify and improve format extraction

This commit is contained in:
InvalidUsernameException 2025-04-01 22:05:13 +02:00
parent d15e2ee36f
commit a5903d16ed

View File

@ -17,7 +17,6 @@ from ..utils import (
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query,
url_or_none, url_or_none,
urljoin, urljoin,
variadic, variadic,
@ -56,40 +55,36 @@ class ZDFBaseIE(InfoExtractor):
return subtitles return subtitles
def _extract_format(self, video_id, formats, format_urls, meta): def _extract_format(self, video_id, formats, format_urls, meta):
format_url = url_or_none(meta.get('url')) format_url = meta['url']
if not format_url or format_url in format_urls: if format_url in format_urls:
return return
format_urls.add(format_url) format_urls.add(format_url)
ext = determine_ext(format_url)
mime_type, ext = meta.get('mimeType'), determine_ext(format_url) if ext == 'm3u8':
if mime_type == 'application/x-mpegURL' or ext == 'm3u8': fmts = self._extract_m3u8_formats(
new_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
format_url, video_id, 'mp4', m3u8_id='hls',
entry_protocol='m3u8_native', fatal=False)
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
new_formats = self._extract_f4m_formats(
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
elif ext == 'mpd': elif ext == 'mpd':
new_formats = self._extract_mpd_formats( fmts = self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False) format_url, video_id, mpd_id='dash', fatal=False)
else: else:
f = parse_codecs(meta.get('mimeCodec')) f = parse_codecs(meta.get('mimeCodec'))
if not f and meta.get('type'): if not f and meta.get('type'):
data = meta['type'].split('_') data = meta['type'].split('_')
if try_get(data, lambda x: x[2]) == ext: if len(data) >= 3 and data[2] == ext:
f = {'vcodec': data[0], 'acodec': data[1]} f = {'vcodec': data[0], 'acodec': data[1]}
f.update({ f.update({
'url': format_url, 'url': format_url,
'height': int_or_none(meta.get('highestVerticalResolution')),
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')), 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)), 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)),
}) })
new_formats = [f] fmts = [f]
formats.extend(merge_dicts(f, { formats.extend(merge_dicts(f, {
'format_note': join_nonempty('quality', 'class', from_dict=meta, delim=', '), 'format_note': join_nonempty(meta.get('quality'), meta.get('class'), delim=', '),
'language': meta.get('language'), 'language': meta.get('language'),
'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1, 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
'quality': qualities(self._QUALITIES)(meta.get('quality')), 'quality': qualities(self._QUALITIES)(meta.get('quality')),
}) for f in new_formats) }) for f in fmts)
def _extract_ptmd(self, api_base_url, templates, video_id, api_token=None): def _extract_ptmd(self, api_base_url, templates, video_id, api_token=None):
# TODO: HTTPS formats are extracted without resolution information # TODO: HTTPS formats are extracted without resolution information
@ -107,31 +102,20 @@ class ZDFBaseIE(InfoExtractor):
'{playerId}', 'android_native_6')) '{playerId}', 'android_native_6'))
ptmd = self._call_api(ptmd_url, video_id, 'PTMD data', api_token) ptmd = self._call_api(ptmd_url, video_id, 'PTMD data', api_token)
content_id = content_id or ptmd.get('basename') or ptmd_url.split('/')[-1] content_id = content_id or ptmd.get('basename') or ptmd_url.split('/')[-1]
duration = (duration or float_or_none(try_get( duration = (duration or traverse_obj(ptmd, ('attributes', 'duration', 'value', {float_or_none(scale=1000)})))
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000))
src_captions += ptmd.get('captions') or [] src_captions += ptmd.get('captions') or []
for p in ptmd['priorityList']: for stream in traverse_obj(ptmd, ('priorityList', ..., 'formitaeten', ..., {dict})):
formitaeten = p.get('formitaeten') for quality in traverse_obj(stream, ('qualities', ..., {dict})):
if not isinstance(formitaeten, list): for variant in traverse_obj(quality, ('audio', 'tracks', lambda _, v: url_or_none(v['uri']))):
continue self._extract_format(
for f in formitaeten: content_id, formats, track_uris, {
f_qualities = f.get('qualities') 'url': variant.get('uri'),
if not isinstance(f_qualities, list): 'type': stream.get('type'),
continue 'mimeCodec': quality.get('mimeCodec'),
for quality in f_qualities: 'quality': quality.get('quality'),
tracks = try_get(quality, lambda x: x['audio']['tracks'], list) 'class': variant.get('class'),
if not tracks: 'language': variant.get('language'),
continue })
for track in tracks:
self._extract_format(
content_id, formats, track_uris, {
'url': track.get('uri'),
'type': f.get('type'),
'mimeType': f.get('mimeType'),
'quality': quality.get('quality'),
'class': track.get('class'),
'language': track.get('language'),
})
return { return {
'extractor_key': ZDFIE.ie_key(), 'extractor_key': ZDFIE.ie_key(),