Merge branch 'yt-dlp:master' into misc/cleanup

This commit is contained in:
bashonly 2025-04-30 17:54:36 -05:00
commit 3addb82d1c
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
4 changed files with 759 additions and 479 deletions

View File

@ -1,9 +1,15 @@
from .zdf import ZDFBaseIE
from ..utils import (
int_or_none,
merge_dicts,
parse_iso8601,
)
from ..utils.traversal import require, traverse_obj
class DreiSatIE(ZDFBaseIE):
IE_NAME = '3sat'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)\.html'
_TESTS = [{
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
'info_dict': {
@ -12,40 +18,59 @@ class DreiSatIE(ZDFBaseIE):
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
'description': 'md5:26329ce5197775b596773b939354079d',
'duration': 2625.0,
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148',
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
'timestamp': 1738593000,
'upload_date': '20250203',
'timestamp': 1747920900,
'upload_date': '20250522',
},
}, {
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html',
'md5': 'f92638413a11d759bdae95c9d8ec165c',
'info_dict': {
'id': '141007_ab18_10wochensommer_film',
'id': '221128_mein_fremdes_ich2_ab18',
'ext': 'mp4',
'title': 'Ab 18! - 10 Wochen Sommer',
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
'duration': 2660,
'timestamp': 1608604200,
'upload_date': '20201222',
'title': 'Ab 18! - Mein fremdes Ich',
'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0',
'duration': 2625.0,
'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812',
'episode': 'Ab 18! - Mein fremdes Ich',
'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f',
'timestamp': 1695081600,
'upload_date': '20230919',
},
'skip': '410 Gone',
}, {
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html',
'md5': 'a903eaf8d1fd635bd3317cd2ad87ec84',
'info_dict': {
'id': '140913_sendung_schweizweit',
'id': '250323_0903_sendung_sgl',
'ext': 'mp4',
'title': 'Waidmannsheil',
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'timestamp': 1410623100,
'upload_date': '20140913',
'title': 'Plötzlich ohne dich',
'description': 'md5:380cc10659289dd91510ad8fa717c66b',
'duration': 1620.0,
'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810',
'episode': 'Plötzlich ohne dich',
'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b',
'timestamp': 1743162540,
'upload_date': '20250328',
},
'params': {
'skip_download': True,
}, {
# Video with chapters
'url': 'https://www.3sat.de/kultur/buchmesse/dein-buch-das-beste-von-der-leipziger-buchmesse-2025-teil-1-100.html',
'md5': '6b95790ce52e75f0d050adcdd2711ee6',
'info_dict': {
'id': '250330_dein_buch1_bum',
'ext': 'mp4',
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
'duration': 5399.0,
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
'chapters': 'count:24',
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
'timestamp': 1743327000,
'upload_date': '20250330',
},
'skip': '404 Not Found',
}, {
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
@ -58,11 +83,42 @@ class DreiSatIE(ZDFBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._search_json(
r'data-zdfplayer-jsb=(["\'])', webpage, 'player JSON', video_id)
player_url = player['content']
api_token = f'Bearer {player["apiToken"]}'
webpage = self._download_webpage(url, video_id, fatal=False)
if webpage:
player = self._extract_player(webpage, url, fatal=False)
if player:
return self._extract_regular(url, player, video_id)
content = self._call_api(player_url, video_id, 'video metadata', api_token)
return self._extract_mobile(video_id)
video_target = content['mainVideoContent']['http://zdf.de/rels/target']
ptmd_path = traverse_obj(video_target, (
(('streams', 'default'), None),
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
{str}, any, {require('ptmd path')}))
ptmd_url = self._expand_ptmd_template(player_url, ptmd_path)
aspect_ratio = self._parse_aspect_ratio(video_target.get('aspectRatio'))
info = self._extract_ptmd(ptmd_url, video_id, api_token, aspect_ratio)
return merge_dicts(info, {
**traverse_obj(content, {
'title': (('title', 'teaserHeadline'), {str}, any),
'episode': (('title', 'teaserHeadline'), {str}, any),
'description': (('leadParagraph', 'teasertext'), {str}, any),
'timestamp': ('editorialDate', {parse_iso8601}),
}),
**traverse_obj(video_target, {
'duration': ('duration', {int_or_none}),
'chapters': ('streamAnchorTag', {self._extract_chapters}),
}),
'thumbnails': self._extract_thumbnails(traverse_obj(content, ('teaserImageRef', 'layouts', {dict}))),
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
'episode_number': ('episodeNumber', {int_or_none}),
'episode_id': ('contentId', {str}),
})),
})

View File

@ -1,5 +1,3 @@
import re
from .youtube import YoutubeIE
from .zdf import ZDFBaseIE
from ..utils import (
@ -7,44 +5,27 @@ from ..utils import (
merge_dicts,
try_get,
unified_timestamp,
urljoin,
)
class PhoenixIE(ZDFBaseIE):
IE_NAME = 'phoenix.de'
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/?#]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
_TESTS = [{
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
'md5': '34ec321e7eb34231fd88616c65c92db0',
'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html',
'md5': 'a79e86d9774d0b3f2102aff988a0bd32',
'info_dict': {
'id': '210222_phx_nachgehakt_corona_protest',
'id': '221215_phx_spitzbergen',
'ext': 'mp4',
'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'duration': 1691,
'timestamp': 1613902500,
'upload_date': '20210221',
'title': 'Spitzbergen',
'description': 'Film von Tilmann Bünz',
'duration': 728.0,
'timestamp': 1555600500,
'upload_date': '20190418',
'uploader': 'Phoenix',
'series': 'corona nachgehakt',
'episode': 'Wohin führt der Protest in der Pandemie?',
},
}, {
# Youtube embed
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
'info_dict': {
'id': 'hMQtqFYjomk',
'ext': 'mp4',
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
'duration': 3509,
'upload_date': '20201219',
'uploader': 'phoenix',
'uploader_id': 'phoenix',
},
'params': {
'skip_download': True,
'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png',
'series': 'Dokumentationen',
'episode': 'Spitzbergen',
},
}, {
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
@ -90,8 +71,8 @@ class PhoenixIE(ZDFBaseIE):
content_id = details['tracking']['nielsen']['content']['assetid']
info = self._extract_ptmd(
f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}',
content_id, None, url)
f'https://tmd.phoenix.de/tmd/2/android_native_6/vod/ptmd/phoenix/{content_id}',
content_id)
duration = int_or_none(try_get(
details, lambda x: x['tracking']['nielsen']['content']['length']))
@ -101,20 +82,8 @@ class PhoenixIE(ZDFBaseIE):
str)
episode = title if details.get('contentType') == 'episode' else None
thumbnails = []
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
for thumbnail_key, thumbnail_url in teaser_images.items():
thumbnail_url = urljoin(url, thumbnail_url)
if not thumbnail_url:
continue
thumbnail = {
'url': thumbnail_url,
}
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m:
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail)
thumbnails = self._extract_thumbnails(teaser_images)
return merge_dicts(info, {
'id': content_id,

View File

@ -2122,23 +2122,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
return inner
def _load_nsig_code_from_cache(self, player_url):
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
def _load_player_data_from_cache(self, name, player_url):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if func_code := self._player_cache.get(cache_id):
return func_code
if data := self._player_cache.get(cache_id):
return data
func_code = self.cache.load(*cache_id, min_ver='2025.03.31')
if func_code:
self._player_cache[cache_id] = func_code
data = self.cache.load(*cache_id, min_ver='2025.03.31')
if data:
self._player_cache[cache_id] = data
return func_code
return data
def _store_nsig_code_to_cache(self, player_url, func_code):
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
def _store_player_data_to_cache(self, name, player_url, data):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if cache_id not in self._player_cache:
self.cache.store(*cache_id, func_code)
self._player_cache[cache_id] = func_code
self.cache.store(*cache_id, data)
self._player_cache[cache_id] = data
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
@ -2181,7 +2181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(f'Decrypted nsig {s} => {ret}')
# Only cache nsig func JS code to disk if successful, and only once
self._store_nsig_code_to_cache(player_url, func_code)
self._store_player_data_to_cache('nsig', player_url, func_code)
return ret
def _extract_n_function_name(self, jscode, player_url=None):
@ -2300,7 +2300,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self._load_nsig_code_from_cache(player_url)
func_code = self._load_player_data_from_cache('nsig', player_url)
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
@ -2336,23 +2336,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
Extract signatureTimestamp (sts)
Required to tell API what sig/player version is in use.
"""
sts = None
if isinstance(ytcfg, dict):
sts = int_or_none(ytcfg.get('STS'))
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
return sts
if not player_url:
error_msg = 'Cannot extract signature timestamp without player url'
if fatal:
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return None
sts = self._load_player_data_from_cache('sts', player_url)
if sts:
return sts
if code := self._load_player(video_id, player_url, fatal=fatal):
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
if sts:
self._store_player_data_to_cache('sts', player_url, sts)
if not sts:
# Attempt to extract from player
if player_url is None:
error_msg = 'Cannot extract signature timestamp without player_url.'
if fatal:
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return
code = self._load_player(video_id, player_url, fatal=fatal)
if code:
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
return sts
def _mark_watched(self, video_id, player_responses):
@ -3234,12 +3238,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
self.report_warning(
f'Some {client_name} client https formats have been skipped as they are missing a url. '
f'{"Your account" if self.is_authenticated else "The current session"} may have '
f'the SSAP (server-side ads) experiment which interferes with yt-dlp. '
f'Please see https://github.com/yt-dlp/yt-dlp/issues/12482 for more details.',
video_id, only_once=True)
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
if client_name == 'web':
msg += 'YouTube is forcing SABR streaming for this client. '
else:
msg += (
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
f'{"your account" if self.is_authenticated else "the current session"}. '
)
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
self.report_warning(msg, video_id, only_once=True)
continue
try:
fmt_url += '&{}={}'.format(
@ -3326,8 +3334,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize broken, damaged and 3gp formats
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
# Strictly de-prioritize damaged and 3gp formats
'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

File diff suppressed because it is too large Load Diff