From ea5b13fd0dd6ee9e3dd16e3dd6b6a7cc32bc19a0 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 11 Nov 2024 20:02:07 +0100 Subject: [PATCH] Simplify code --- yt_dlp/extractor/_extractors.py | 4 -- yt_dlp/extractor/spreaker.py | 105 ++++++++------------------------ 2 files changed, 26 insertions(+), 83 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a645dd2a08..0b935fe3a5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1938,12 +1938,8 @@ from .spotify import ( SpotifyShowIE, ) from .spreaker import ( - SpreakerFeedPageIE, SpreakerIE, - SpreakerPageIE, - SpreakerPodcastPageIE, SpreakerShowIE, - SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index 87931384bf..c64c2fcd2e 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -2,6 +2,7 @@ import itertools from .common import InfoExtractor from ..utils import ( + filter_dict, float_or_none, int_or_none, parse_qs, @@ -119,48 +120,46 @@ class SpreakerIE(InfoExtractor): def _real_extract(self, url): episode_id = self._match_id(url) data = self._download_json( - f'https://api.spreaker.com/v2/episodes/{episode_id}', - episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] + f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id, + query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] return _extract_episode(data, episode_id) -class SpreakerPageIE(InfoExtractor): - # NOTE:the sample test doesnt work, need to futher investigate - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - episode_id = self._search_regex( - (r'data-episode_id=["\'](?P\d+)', - r'episode_id\s*:\s*(?P\d+)'), webpage, 'episode id') - return self.url_result( - f'https://api.spreaker.com/episode/{episode_id}', - ie=SpreakerIE.ie_key(), video_id=episode_id) - - class SpreakerShowIE(InfoExtractor): - _VALID_URL = r'https?://api\.spreaker\.com/show/(?P\d+)' + _VALID_URL = [ + r'https?://api\.spreaker\.com/show/(?P\d+)', + r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P[\d]+)', + r'https?://(?:www\.)?spreaker\.com/show/(?P\d+)/episodes/feed', + ] _TESTS = [{ 'url': 'https://api.spreaker.com/show/4652058', 'info_dict': { 'id': '4652058', }, 'playlist_mincount': 118, + }, { + 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323', + 'info_dict': { + 'id': '5918323', + }, + 'playlist_mincount': 60, + }, { + 'url': 'https://www.spreaker.com/show/5887186/episodes/feed', + 'info_dict': { + 'id': '5887186', + }, + 'playlist_mincount': 290, }] - def _entries(self, show_id): + def _entries(self, show_id, key=None): for page_num in itertools.count(1): episodes = self._download_json( f'https://api.spreaker.com/show/{show_id}/episodes', - show_id, note=f'Downloading JSON page {page_num}', query={ + show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({ 'page': page_num, 'max_per_page': 100, - }) + 'key': key, + })) pager = try_get(episodes, lambda x: x['response']['pager'], dict) if not pager: break @@ -176,57 +175,5 @@ class SpreakerShowIE(InfoExtractor): def _real_extract(self, url): show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) - - -class SpreakerShowPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P[^/?#&]+)(?:[?#]|$)(?!/episodes/feed)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/show/success-with-music', - 'info_dict': { - 'id': '2317431', - }, - 'playlist_mincount': 30, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - redirect_url = self._og_search_property('url', webpage, 'redirect url') - return self.url_result( - redirect_url, - SpreakerPodcastPageIE) - - -class SpreakerPodcastPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P[\d]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323', - 'info_dict': { - 'id': '5918323', - }, - 'playlist_mincount': 60, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.url_result( - f'https://api.spreaker.com/show/{show_id}', - ie=SpreakerShowIE.ie_key(), video_id=show_id) - - -class SpreakerFeedPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P\d+)/episodes/feed' - _TESTS = [{ - 'url': 'https://www.spreaker.com/show/5887186/episodes/feed', - 'info_dict': { - 'id': '5887186', - }, - 'playlist_mincount': 290, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.url_result( - f'https://api.spreaker.com/show/{show_id}', - ie=SpreakerShowIE.ie_key(), video_id=show_id) + key = traverse_obj(parse_qs(url), ('key', 0)) + return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)