Add necessary imports

Fix url regex
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2026-06-25 20:24:47 +00:00 · 2025-05-05 15:49:54 -04:00 · 2025-05-05 15:46:27 -04:00 · 2025-05-05 15:46:12 -04:00 · 2025-05-05 15:45:25 -04:00 · 2025-05-05 15:43:55 -04:00
1 changed files with 15 additions and 25 deletions
--- a/yt_dlp/extractor/playerfm.py
+++ b/yt_dlp/extractor/playerfm.py
@ -1,11 +1,11 @@

-from .common import ExtractorError, InfoExtractor
-from ..utils import determine_ext, join_nonempty
+from .common import InfoExtractor
+from ..utils import clean_html, int_or_none, url_or_none
 from ..utils.traversal import traverse_obj


 class PlayerFmIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?player.fm/(?:series)?/[a-z\d-]+/(?P<id>[a-z\d-]+)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P<id>[\w-]+))'
    _TESTS = [{
        'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
        'info_dict': {
@ -13,7 +13,7 @@ class PlayerFmIE(InfoExtractor):
            'thumbnail': r're:^https://.*\.(jpg|png)',
            'title': 'Movie Mindset 33 - Casino feat. Felix',
            'creators': ['Chapo Trap House'],
-            'description': 'The first episode of this season of Movie Mindset is free for all listeners as always. To listen to the rest of the season, subscribe at <a href="http://www.patreon.com/chapotraphouse">www.patreon.com/chapotraphouse</a> When you love movies, you’ve got to watch them. There’s no other way…Movie Mindset Season 3 commences with our first ever single feature on the most referenced movie in Chapo Trap House history: Martin Scorsese’s masterpiece Casino. Will and Hesse are joined by Felix to take a kaleidoscopic and dizzying dive into the inferno of American greed that is Las Vegas. Anchored by a triumvirate of all career great performances from Robert De Niro, Sharon Stone and Joe Pesci in FULL PSYCHO MODE, Casino is by equal turns hilarious and stomach turning and stands alone as Scorsese’s grandest and most generous examination of evil and the tragic flaws that doom us all. Should you listen even if you haven’t seen this movie? Why take a chance? At least that the way we feel about it.',
+            'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
            'duration': 6830,
            'ext': 'mp3',
        },
@ -43,28 +43,18 @@ class PlayerFmIE(InfoExtractor):

    def _real_extract(self, url):
        # podcast url is always after last backlash
-        video_id = self._match_id(url)
-        data = self._download_json(url + '.json', None)
+        video_id, url = self._match_valid_url(url).group('id', 'url')
+        data = self._download_json(url + '.json', video_id)

-        title = data.get('title')
-        description = data.get('description')
-        duration = data.get('duration')
-        thumbnail = traverse_obj(data, ('image', 'url'), ('series', 'image', 'url'))
-        creators = [traverse_obj(data, ('series', 'author'))]
-
-        video_url = join_nonempty('https', self._search_regex(r'redirect.mp3/(.*)', data['url'], 'redirect'), delim='://')
-        if not video_url:
-            raise ExtractorError('URL to podcast not found', expected=True)
-        formats = [{
-            'url': video_url,
-            'ext': determine_ext(video_url, default_ext=''),
-        }]
        return {
            'id': video_id,
-            'thumbnail': thumbnail,
-            'title': title,
-            'creators': creators,
-            'description': description,
-            'duration': duration,
-            'formats': formats,
+            'url': 'https://' + self._search_regex(r'redirect\.mp3/(.+)', data['url'], 'video url'),
+            'vcodec': 'none',
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'description': ('description', {clean_html}),
+                'duration': ('duration', {int_or_none}),
+                'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
+                'creators': ('series', 'author', {str}, filter, all, filter),
+            }),
        }
Author	SHA1	Message	Date
Rohan Wadhwa	730a510e97	Add necessary imports	2025-05-05 15:49:54 -04:00
R0hanW	d661f35cd6	Fix url regex Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:46:27 -04:00
R0hanW	59979a0680	Use traverse_obj to unpack json data Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:46:12 -04:00
R0hanW	9c4772bc12	Add vcodec to return; remove unnecessary code Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:45:25 -04:00
R0hanW	fdf89a1d39	Inline code Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:43:55 -04:00
R0hanW	a181f56cb0	Use url capture group instead of adding .json to url Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:43:43 -04:00
R0hanW	9ddb1aac65	Use regex for description in test case Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-05-05 15:43:19 -04:00