Compare commits

...

2 Commits

Author SHA1 Message Date
fries1234
682334e4b3
[ie/tvw:news] Add extractor (#12907)
Authored by: fries1234
2025-07-27 22:26:33 +02:00
Florentin Le Moal
b831406a1d
[ie/rtve.es:program] Add extractor
Authored by: meGAmeS1, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2025-07-27 21:52:05 +02:00
3 changed files with 117 additions and 2 deletions

View File

@ -1781,6 +1781,7 @@ from .rtve import (
RTVEALaCartaIE,
RTVEAudioIE,
RTVELiveIE,
RTVEProgramIE,
RTVETelevisionIE,
)
from .rtvs import RTVSIE
@ -2234,6 +2235,7 @@ from .tvplay import (
from .tvplayer import TVPlayerIE
from .tvw import (
TvwIE,
TvwNewsIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE

View File

@ -6,9 +6,11 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
int_or_none,
make_archive_id,
parse_iso8601,
qualities,
@ -371,3 +373,62 @@ class RTVETelevisionIE(InfoExtractor):
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
class RTVEProgramIE(RTVEBaseIE):
IE_NAME = 'rtve.es:program'
IE_DESC = 'RTVE.es programs'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.rtve.es/play/videos/saber-vivir/',
'info_dict': {
'id': '111570',
'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play',
},
'playlist_mincount': 400,
}]
_PAGE_SIZE = 60
def _fetch_page(self, program_id, page_num):
return self._download_json(
f'https://www.rtve.es/api/programas/{program_id}/videos',
program_id, note=f'Downloading page {page_num}',
query={
'type': 39816,
'page': page_num,
'size': 60,
})
def _entries(self, page_data):
for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))):
yield self.url_result(
video['htmlUrl'], RTVEALaCartaIE, url_transparent=True,
**traverse_obj(video, {
'id': ('id', {str}),
'title': ('longTitle', {str}),
'description': ('shortDescription', {str}),
'duration': ('duration', {float_or_none(scale=1000)}),
'series': (('programInfo', 'title'), {str}, any),
'season_number': ('temporadaOrden', {int_or_none}),
'season_id': ('temporadaId', {str}),
'season': ('temporada', {str}),
'episode_number': ('episode', {int_or_none}),
'episode': ('title', {str}),
'thumbnail': ('thumbnail', {url_or_none}),
}),
)
def _real_extract(self, url):
program_slug = self._match_id(url)
program_page = self._download_webpage(url, program_slug)
program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True)
first_page = self._fetch_page(program_id, 1)
page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1
entries = InAdvancePagedList(
lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page),
page_count, self._PAGE_SIZE)
return self.playlist_result(entries, program_id, self._html_extract_title(program_page))

View File

@ -10,12 +10,15 @@ from ..utils import (
unified_timestamp,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwIE(InfoExtractor):
IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_VALID_URL = [
r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)',
r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -75,6 +78,20 @@ class TvwIE(InfoExtractor):
'display_id': 'washington-to-washington-a-new-space-race-2022041111',
'categories': ['Washington to Washington', 'General Interest'],
},
}, {
'url': 'https://tvw.org/watch?eventID=2025041235',
'md5': '7d697c02f110b37d6a47622ea608ca90',
'info_dict': {
'id': '2025041235',
'ext': 'mp4',
'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration',
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
'description': 'md5:37d0f3a9187ae520aac261b3959eaee6',
'timestamp': 1745006400,
'upload_date': '20250418',
'location': 'Hayner Media Center',
'categories': ['Legislative Review'],
},
}]
def _real_extract(self, url):
@ -125,6 +142,41 @@ class TvwIE(InfoExtractor):
}
class TvwNewsIE(InfoExtractor):
IE_NAME = 'tvw:news'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/',
'info_dict': {
'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session',
'title': 'The Impact - Issues to Watch in the 2024 Legislative Session',
'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441',
},
'playlist_count': 6,
}, {
'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/',
'info_dict': {
'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate',
'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate',
'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
video_ids = traverse_obj(webpage, (
{find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid'))
return self.playlist_from_matches(
video_ids, playlist_id,
playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
playlist_description=self._og_search_description(webpage, default=None),
getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE)
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'