Compare commits

...

7 Commits

Author SHA1 Message Date
Rohan Wadhwa
730a510e97 Add necessary imports 2025-05-05 15:49:54 -04:00
R0hanW
d661f35cd6
Fix url regex
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:46:27 -04:00
R0hanW
59979a0680
Use traverse_obj to unpack json data
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:46:12 -04:00
R0hanW
9c4772bc12
Add vcodec to return; remove unnecessary code
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:45:25 -04:00
R0hanW
fdf89a1d39
Inline code
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:43:55 -04:00
R0hanW
a181f56cb0
Use url capture group instead of adding .json to url
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:43:43 -04:00
R0hanW
9ddb1aac65
Use regex for description in test case
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-05 15:43:19 -04:00

View File

@ -1,11 +1,11 @@
from .common import ExtractorError, InfoExtractor
from ..utils import determine_ext, join_nonempty
from .common import InfoExtractor
from ..utils import clean_html, int_or_none, url_or_none
from ..utils.traversal import traverse_obj
class PlayerFmIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?player.fm/(?:series)?/[a-z\d-]+/(?P<id>[a-z\d-]+)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P<id>[\w-]+))'
_TESTS = [{
'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
'info_dict': {
@ -13,7 +13,7 @@ class PlayerFmIE(InfoExtractor):
'thumbnail': r're:^https://.*\.(jpg|png)',
'title': 'Movie Mindset 33 - Casino feat. Felix',
'creators': ['Chapo Trap House'],
'description': 'The first episode of this season of Movie Mindset is free for all listeners as always. To listen to the rest of the season, subscribe at <a href="http://www.patreon.com/chapotraphouse">www.patreon.com/chapotraphouse</a> When you love movies, youve got to watch them. Theres no other way…Movie Mindset Season 3 commences with our first ever single feature on the most referenced movie in Chapo Trap House history: Martin Scorseses masterpiece Casino. Will and Hesse are joined by Felix to take a kaleidoscopic and dizzying dive into the inferno of American greed that is Las Vegas. Anchored by a triumvirate of all career great performances from Robert De Niro, Sharon Stone and Joe Pesci in FULL PSYCHO MODE, Casino is by equal turns hilarious and stomach turning and stands alone as Scorseses grandest and most generous examination of evil and the tragic flaws that doom us all. Should you listen even if you havent seen this movie? Why take a chance? At least that the way we feel about it.',
'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
'duration': 6830,
'ext': 'mp3',
},
@ -43,28 +43,18 @@ class PlayerFmIE(InfoExtractor):
def _real_extract(self, url):
# podcast url is always after last backlash
video_id = self._match_id(url)
data = self._download_json(url + '.json', None)
video_id, url = self._match_valid_url(url).group('id', 'url')
data = self._download_json(url + '.json', video_id)
title = data.get('title')
description = data.get('description')
duration = data.get('duration')
thumbnail = traverse_obj(data, ('image', 'url'), ('series', 'image', 'url'))
creators = [traverse_obj(data, ('series', 'author'))]
video_url = join_nonempty('https', self._search_regex(r'redirect.mp3/(.*)', data['url'], 'redirect'), delim='://')
if not video_url:
raise ExtractorError('URL to podcast not found', expected=True)
formats = [{
'url': video_url,
'ext': determine_ext(video_url, default_ext=''),
}]
return {
'id': video_id,
'thumbnail': thumbnail,
'title': title,
'creators': creators,
'description': description,
'duration': duration,
'formats': formats,
'url': 'https://' + self._search_regex(r'redirect\.mp3/(.+)', data['url'], 'video url'),
'vcodec': 'none',
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {clean_html}),
'duration': ('duration', {int_or_none}),
'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
'creators': ('series', 'author', {str}, filter, all, filter),
}),
}