Compare commits

...

2 Commits

Author SHA1 Message Date
Gareth Seddon
8d6e0b29bf
[ie/MatchiTV] Add extractor (#15204)
Authored by: gseddon
2026-02-12 08:14:56 +00:00
Corey Wright
1ea7329cc9
[ie/ApplePodcasts] Fix extractor (#15901)
Closes #15900
Authored by: coreywright
2026-02-12 08:09:37 +00:00
3 changed files with 54 additions and 15 deletions

View File

@ -1077,6 +1077,7 @@ from .markiza import (
) )
from .massengeschmacktv import MassengeschmackTVIE from .massengeschmacktv import MassengeschmackTVIE
from .masters import MastersIE from .masters import MastersIE
from .matchitv import MatchiTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mave import ( from .mave import (
MaveChannelIE, MaveChannelIE,

View File

@ -11,18 +11,18 @@ from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor): class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654', 'url': 'https://podcasts.apple.com/us/podcast/urbana-podcast-724-by-david-penn/id1531349107?i=1000748574256',
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172', 'md5': 'f8a6f92735d0cfbd5e6a7294151e28d8',
'info_dict': { 'info_dict': {
'id': '1000665010654', 'id': '1000748574256',
'ext': 'mp3', 'ext': 'm4a',
'title': 'Ferreck Dawn - To The Break of Dawn 117', 'title': 'URBANA PODCAST 724 BY DAVID PENN',
'episode': 'Ferreck Dawn - To The Break of Dawn 117', 'episode': 'URBANA PODCAST 724 BY DAVID PENN',
'description': 'md5:8c4f5c2c30af17ed6a98b0b9daf15b76', 'description': 'md5:fec77bacba32db8c9b3dda5486ed085f',
'upload_date': '20240812', 'upload_date': '20260206',
'timestamp': 1723449600, 'timestamp': 1770400801,
'duration': 3596, 'duration': 3602,
'series': 'Ferreck Dawn - To The Break of Dawn', 'series': 'Urbana Radio Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)', 'thumbnail': 're:.+[.](png|jpe?g|webp)',
}, },
}, { }, {
@ -57,22 +57,22 @@ class ApplePodcastsIE(InfoExtractor):
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
server_data = self._search_json( server_data = self._search_json(
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage, r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data'] 'server data', episode_id)['data'][0]['data']
model_data = traverse_obj(server_data, ( model_data = traverse_obj(server_data, (
'headerButtonItems', lambda _, v: v['$kind'] == 'share' and v['modelType'] == 'EpisodeLockup', 'headerButtonItems', lambda _, v: v['$kind'] == 'share' and v['modelType'] == 'EpisodeLockup',
'model', {dict}, any)) 'model', {dict}, any))
return { return {
'id': episode_id, 'id': episode_id,
**self._json_ld(
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
**traverse_obj(model_data, { **traverse_obj(model_data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('summary', {clean_html}), 'description': ('summary', {clean_html}),
'url': ('playAction', 'episodeOffer', 'streamUrl', {clean_podcast_url}), 'url': ('playAction', 'episodeOffer', 'streamUrl', {clean_podcast_url}),
'timestamp': ('releaseDate', {parse_iso8601}), 'timestamp': ('releaseDate', {parse_iso8601}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'episode': ('title', {str}),
'episode_number': ('episodeNumber', {int_or_none}),
'series': ('showTitle', {str}),
}), }),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'vcodec': 'none', 'vcodec': 'none',

View File

@ -0,0 +1,38 @@
from .common import InfoExtractor
from ..utils import join_nonempty, unified_strdate
from ..utils.traversal import traverse_obj
class MatchiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?matchi\.tv/watch/?\?(?:[^#]+&)?s=(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://matchi.tv/watch?s=0euhjzrxsjm',
'info_dict': {
'id': '0euhjzrxsjm',
'ext': 'mp4',
'title': 'Court 2 at Stratford Padel Club 2024-07-13T18:32:24',
'thumbnail': 'https://thumbnails.padelgo.tv/0euhjzrxsjm.jpg',
'upload_date': '20240713',
},
}, {
'url': 'https://matchi.tv/watch?s=FkKDJ9SvAx1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
loaded_media = traverse_obj(
self._search_nextjs_data(webpage, video_id, fatal=False),
('props', 'pageProps', 'loadedMedia', {dict})) or {}
start_date_time = traverse_obj(loaded_media, ('startDateTime', {str}))
return {
'id': video_id,
'title': join_nonempty(loaded_media.get('courtDescription'), start_date_time, delim=' '),
'thumbnail': f'https://thumbnails.padelgo.tv/{video_id}.jpg',
'upload_date': unified_strdate(start_date_time),
'formats': self._extract_m3u8_formats(
f'https://streams.padelgo.tv/v2/streams/m3u8/{video_id}/anonymous/playlist.m3u8',
video_id, 'mp4', m3u8_id='hls'),
}