Rework extractor

This commit is contained in:
sepro 2025-01-12 10:07:00 +01:00
parent 8503b2b62c
commit 46f8f1d38a

View File

@ -1,52 +1,51 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import extract_attributes, smuggle_url
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class DrTalksIE(InfoExtractor):
_VALID_URL = r'https?://event\.drtalks\.com/(?P<id>.+/[^/]+)/?'
_VALID_URL = r'https?://(?:www\.)?drtalks\.com/videos/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://event.drtalks.com/reversing-heart-disease-summit/free-access-day-1',
'url': 'https://drtalks.com/videos/six-pillars-of-resilience-tools-for-managing-stress-and-flourishing/',
'info_dict': {
'id': '1758074870279626053',
'title': 'Free Access Day 1 - Events at DrTalks',
'thumbnail': r're:https://event.drtalks.com/wp-content/uploads/.+',
},
'playlist_mincount': 11,
'params': {
'skip_download': True,
'id': '6366193757112',
'ext': 'mp4',
'uploader_id': '6314452011001',
'tags': ['resilience'],
'description': 'md5:9c6805aee237ee6de8052461855b9dda',
'timestamp': 1734546659,
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/12/Episode-82-Eva-Selhub-DrTalks-Thumbs.jpg',
'title': 'Six Pillars of Resilience: Tools for Managing Stress and Flourishing',
'duration': 2800.682,
'upload_date': '20241218',
},
}, {
'url': 'https://event.drtalks.com/bioenergetics-2022/free-access-day-1/',
'url': 'https://drtalks.com/videos/the-pcos-puzzle-mastering-metabolic-health-with-marcelle-pick/',
'info_dict': {
'id': '1747611460188466596',
'title': 'The BioEnergetics Summit',
'thumbnail': r're:https://event.drtalks.com/wp-content/uploads/.+',
'id': '6364699891112',
'ext': 'mp4',
'title': 'The PCOS Puzzle: Mastering Metabolic Health with Marcelle Pick',
'description': 'md5:e87cbe00ca50135d5702787fc4043aaa',
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/11/Episode-34-Marcelle-Pick-OBGYN-NP-DrTalks.jpg',
'duration': 3515.2,
'tags': ['pcos'],
'upload_date': '20241114',
'timestamp': 1731592119,
'uploader_id': '6314452011001',
},
'playlist_mincount': 8,
'params': {
'skip_download': True,
},
}, {
'url': 'https://event.drtalks.com/mitochondrial-summit/encore-access-day-6',
'only_matching': True,
}, {
'url': 'https://event.drtalks.com/medicine-of-mindset-summit/free-access-day-1/',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?playlistId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_attrs = extract_attributes(self._search_regex(
r'(<video-js[^>]+\bid=(["\'])myPlayerID\2[^>]*>)', webpage, 'player'))
bc_url = smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (
player_attrs.get('data-account', '6314452011001'),
player_attrs.get('data-player', 'f3rfrCUjm'),
player_attrs.get('data-embed', 'default'),
player_attrs['data-playlist-id']), {'source_url': url})
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']['video']
return self.url_result(
bc_url, BrightcoveNewIE, video_id, self._og_search_title(webpage),
url_transparent=True, thumbnail=self._og_search_thumbnail(webpage))
next_data['videos']['brightcoveVideoLink'], BrightcoveNewIE, video_id,
url_transparent=True,
**traverse_obj(next_data, {
'title': ('title', {str}),
'description': ('videos', 'summury', {str}),
'thumbnail': ('featuredImage', 'node', 'sourceUrl', {url_or_none}),
}))