Compare commits

...

3 Commits

Author SHA1 Message Date
doe1080
a13f281012
[ie/tvo] Add extractor (#15903)
Authored by: doe1080
2026-02-09 20:57:54 +00:00
doe1080
02ce3efbfe
[ie/tver:olympic] Add extractor (#15885)
Authored by: doe1080
2026-02-09 20:56:39 +00:00
doe1080
1a9c4b8238
[ie/steam] Fix extractor (#15028)
Closes #15014
Authored by: doe1080
2026-02-09 20:33:36 +00:00
6 changed files with 319 additions and 35 deletions

View File

@ -2180,11 +2180,15 @@ from .tvc import (
TVCIE, TVCIE,
TVCArticleIE, TVCArticleIE,
) )
from .tver import TVerIE from .tver import (
TVerIE,
TVerOlympicIE,
)
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tviplayer import TVIPlayerIE from .tviplayer import TVIPlayerIE
from .tvn24 import TVN24IE from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE from .tvnoe import TVNoeIE
from .tvo import TvoIE
from .tvopengr import ( from .tvopengr import (
TVOpenGrEmbedIE, TVOpenGrEmbedIE,
TVOpenGrWatchIE, TVOpenGrWatchIE,

View File

@ -8,15 +8,12 @@ from ..utils import (
extract_attributes, extract_attributes,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
parse_resolution,
str_or_none, str_or_none,
url_basename,
url_or_none, url_or_none,
) )
from ..utils.traversal import ( from ..utils.traversal import find_element, traverse_obj
find_element,
find_elements,
traverse_obj,
trim_str,
)
class SteamIE(InfoExtractor): class SteamIE(InfoExtractor):
@ -27,7 +24,7 @@ class SteamIE(InfoExtractor):
'id': '105600', 'id': '105600',
'title': 'Terraria', 'title': 'Terraria',
}, },
'playlist_mincount': 3, 'playlist_mincount': 5,
}, { }, {
'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/', 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
'info_dict': { 'info_dict': {
@ -37,6 +34,39 @@ class SteamIE(InfoExtractor):
'playlist_mincount': 26, 'playlist_mincount': 26,
}] }]
def _entries(self, app_id, app_name, data_props):
for trailer in traverse_obj(data_props, (
'trailers', lambda _, v: str_or_none(v['id']),
)):
movie_id = str_or_none(trailer['id'])
thumbnails = []
for thumbnail_url in traverse_obj(trailer, (
('poster', 'thumbnail'), {url_or_none},
)):
thumbnails.append({
'url': thumbnail_url,
**parse_resolution(url_basename(thumbnail_url)),
})
formats = []
if hls_manifest := traverse_obj(trailer, ('hlsManifest', {url_or_none})):
formats.extend(self._extract_m3u8_formats(
hls_manifest, app_id, 'mp4', m3u8_id='hls', fatal=False))
for dash_manifest in traverse_obj(trailer, ('dashManifests', ..., {url_or_none})):
formats.extend(self._extract_mpd_formats(
dash_manifest, app_id, mpd_id='dash', fatal=False))
self._remove_duplicate_formats(formats)
yield {
'id': join_nonempty(app_id, movie_id),
'title': join_nonempty(app_name, 'video', movie_id, delim=' '),
'formats': formats,
'series': app_name,
'series_id': app_id,
'thumbnails': thumbnails,
}
def _real_extract(self, url): def _real_extract(self, url):
app_id = self._match_id(url) app_id = self._match_id(url)
@ -45,32 +75,13 @@ class SteamIE(InfoExtractor):
self._set_cookie('store.steampowered.com', 'lastagecheckage', '1-January-2000') self._set_cookie('store.steampowered.com', 'lastagecheckage', '1-January-2000')
webpage = self._download_webpage(url, app_id) webpage = self._download_webpage(url, app_id)
app_name = traverse_obj(webpage, ({find_element(cls='apphub_AppName')}, {clean_html})) data_props = traverse_obj(webpage, (
{find_element(cls='gamehighlight_desktopcarousel', html=True)},
{extract_attributes}, 'data-props', {json.loads}, {dict}))
app_name = traverse_obj(data_props, ('appName', {clean_html}))
entries = [] return self.playlist_result(
for data_prop in traverse_obj(webpage, ( self._entries(app_id, app_name, data_props), app_id, app_name)
{find_elements(cls='highlight_player_item highlight_movie', html=True)},
..., {extract_attributes}, 'data-props', {json.loads}, {dict},
)):
formats = []
if hls_manifest := traverse_obj(data_prop, ('hlsManifest', {url_or_none})):
formats.extend(self._extract_m3u8_formats(
hls_manifest, app_id, 'mp4', m3u8_id='hls', fatal=False))
for dash_manifest in traverse_obj(data_prop, ('dashManifests', ..., {url_or_none})):
formats.extend(self._extract_mpd_formats(
dash_manifest, app_id, mpd_id='dash', fatal=False))
movie_id = traverse_obj(data_prop, ('id', {trim_str(start='highlight_movie_')}))
entries.append({
'id': movie_id,
'title': join_nonempty(app_name, 'video', movie_id, delim=' '),
'formats': formats,
'series': app_name,
'series_id': app_id,
'thumbnail': traverse_obj(data_prop, ('screenshot', {url_or_none})),
})
return self.playlist_result(entries, app_id, app_name)
class SteamCommunityIE(InfoExtractor): class SteamCommunityIE(InfoExtractor):

View File

@ -22,7 +22,7 @@ class StreaksBaseIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False): def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False, live_from_start=False):
try: try:
response = self._download_json( response = self._download_json(
self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''), self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''),
@ -83,6 +83,10 @@ class StreaksBaseIE(InfoExtractor):
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
src_url, media_id, 'mp4', m3u8_id='hls', fatal=False, live=is_live, query=query) src_url, media_id, 'mp4', m3u8_id='hls', fatal=False, live=is_live, query=query)
for fmt in fmts:
if live_from_start:
fmt.setdefault('downloader_options', {}).update({'ffmpeg_args': ['-live_start_index', '0']})
fmt['is_from_start'] = True
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)

View File

@ -4,6 +4,7 @@ from .streaks import StreaksBaseIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
clean_html,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
make_archive_id, make_archive_id,
@ -11,7 +12,9 @@ from ..utils import (
str_or_none, str_or_none,
strip_or_none, strip_or_none,
time_seconds, time_seconds,
unified_timestamp,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import require, traverse_obj from ..utils.traversal import require, traverse_obj
@ -257,3 +260,113 @@ class TVerIE(StreaksBaseIE):
'id': video_id, 'id': video_id,
'_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None, '_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None,
} }
class TVerOlympicIE(StreaksBaseIE):
IE_NAME = 'tver:olympic'
_API_BASE = 'https://olympic-data.tver.jp/api'
_VALID_URL = r'https?://(?:www\.)?tver\.jp/olympic/milanocortina2026/(?P<type>live|video)/play/(?P<id>\w+)'
_TESTS = [{
'url': 'https://tver.jp/olympic/milanocortina2026/video/play/3b1d4462150b42558d9cc8aabb5238d0/',
'info_dict': {
'id': '3b1d4462150b42558d9cc8aabb5238d0',
'ext': 'mp4',
'title': '【開会式】ぎゅっと凝縮ハイライト',
'display_id': 'ref:3b1d4462150b42558d9cc8aabb5238d0',
'duration': 712.045,
'live_status': 'not_live',
'modified_date': r're:\d{8}',
'modified_timestamp': int,
'tags': 'count:1',
'thumbnail': r're:https://.+\.(?:jpg|png)',
'timestamp': 1770420187,
'upload_date': '20260206',
'uploader_id': 'tver-olympic',
},
}, {
'url': 'https://tver.jp/olympic/milanocortina2026/live/play/glts313itwvj/',
'info_dict': {
'id': 'glts313itwvj',
'ext': 'mp4',
'title': '開会式ハイライト',
'channel_id': 'ntv',
'display_id': 'ref:sp_260207_spc_01_dvr',
'duration': 7680,
'live_status': 'was_live',
'modified_date': r're:\d{8}',
'modified_timestamp': int,
'thumbnail': r're:https://.+\.(?:jpg|png)',
'timestamp': 1770420300,
'upload_date': '20260206',
'uploader_id': 'tver-olympic-live',
},
}]
def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).group('type', 'id')
live_from_start = self.get_param('live_from_start')
if video_type == 'live':
project_id = 'tver-olympic-live'
api_key = 'a35ebb1ca7d443758dc7fcc5d99b1f72'
olympic_data = traverse_obj(self._download_json(
f'{self._API_BASE}/live/{video_id}', video_id), ('contents', 'live', {dict}))
media_id = traverse_obj(olympic_data, ('video_id', {str}))
now = time_seconds()
start_timestamp_str = traverse_obj(olympic_data, ('onair_start_date', {str}))
start_timestamp = unified_timestamp(start_timestamp_str, tz_offset=9)
if not start_timestamp:
raise ExtractorError('Unable to extract on-air start time')
end_timestamp = traverse_obj(olympic_data, (
'onair_end_date', {unified_timestamp(tz_offset=9)}, {require('on-air end time')}))
if now < start_timestamp:
self.raise_no_formats(
f'This program is scheduled to start at {start_timestamp_str} JST', expected=True)
return {
'id': video_id,
'live_status': 'is_upcoming',
'release_timestamp': start_timestamp,
}
elif start_timestamp <= now < end_timestamp:
live_status = 'is_live'
if live_from_start:
media_id += '_dvr'
elif end_timestamp <= now:
dvr_end_timestamp = traverse_obj(olympic_data, (
'dvr_end_date', {unified_timestamp(tz_offset=9)}))
if dvr_end_timestamp and now < dvr_end_timestamp:
live_status = 'was_live'
media_id += '_dvr'
else:
raise ExtractorError(
'This program is no longer available', expected=True)
else:
project_id = 'tver-olympic'
api_key = '4b55a4db3cce4ad38df6dd8543e3e46a'
media_id = video_id
live_status = 'not_live'
olympic_data = traverse_obj(self._download_json(
f'{self._API_BASE}/video/{video_id}', video_id), ('contents', 'video', {dict}))
return {
**self._extract_from_streaks_api(project_id, f'ref:{media_id}', {
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
'X-Streaks-Api-Key': api_key,
}, live_from_start=live_from_start),
**traverse_obj(olympic_data, {
'title': ('title', {clean_html}, filter),
'alt_title': ('sub_title', {clean_html}, filter),
'channel': ('channel', {clean_html}, filter),
'channel_id': ('channel_id', {clean_html}, filter),
'description': (('description', 'description_l', 'description_s'), {clean_html}, filter, any),
'timestamp': ('onair_start_date', {unified_timestamp(tz_offset=9)}),
'thumbnail': (('picture_l_url', 'picture_m_url', 'picture_s_url'), {url_or_none}, any),
}),
'id': video_id,
'live_status': live_status,
}

152
yt_dlp/extractor/tvo.py Normal file
View File

@ -0,0 +1,152 @@
import json
import urllib.parse
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_duration,
parse_iso8601,
smuggle_url,
str_or_none,
url_or_none,
)
from ..utils.traversal import (
require,
traverse_obj,
trim_str,
)
class TvoIE(InfoExtractor):
IE_NAME = 'TVO'
_VALID_URL = r'https?://(?:www\.)?tvo\.org/video(?:/documentaries)?/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.tvo.org/video/how-can-ontario-survive-the-trade-war',
'info_dict': {
'id': '6377531034112',
'ext': 'mp4',
'title': 'How Can Ontario Survive the Trade War?',
'description': 'md5:e7455d9cd4b6b1270141922044161457',
'display_id': 'how-can-ontario-survive-the-trade-war',
'duration': 3531,
'episode': 'How Can Ontario Survive the Trade War?',
'episode_id': 'how-can-ontario-survive-the-trade-war',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'TVO at AMO',
'series_id': 'tvo-at-amo',
'tags': 'count:17',
'thumbnail': r're:https?://.+',
'timestamp': 1756944016,
'upload_date': '20250904',
'uploader_id': '18140038001',
},
}, {
'url': 'https://www.tvo.org/video/documentaries/the-pitch',
'info_dict': {
'id': '6382500333112',
'ext': 'mp4',
'title': 'The Pitch',
'categories': ['Documentaries'],
'description': 'md5:9d4246b70dce772a3a396c4bd84c8506',
'display_id': 'the-pitch',
'duration': 5923,
'episode': 'The Pitch',
'episode_id': 'the-pitch',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'The Pitch',
'series_id': 'the-pitch',
'tags': 'count:8',
'thumbnail': r're:https?://.+',
'timestamp': 1762693216,
'upload_date': '20251109',
'uploader_id': '18140038001',
},
}, {
'url': 'https://www.tvo.org/video/documentaries/valentines-day',
'info_dict': {
'id': '6387298331112',
'ext': 'mp4',
'title': 'Valentine\'s Day',
'categories': ['Documentaries'],
'description': 'md5:b142149beb2d3a855244816c50cd2f14',
'display_id': 'valentines-day',
'duration': 3121,
'episode': 'Valentine\'s Day',
'episode_id': 'valentines-day',
'episode_number': 2,
'season': 'Season 1',
'season_number': 1,
'series': 'How We Celebrate',
'series_id': 'how-we-celebrate',
'tags': 'count:6',
'thumbnail': r're:https?://.+',
'timestamp': 1770386416,
'upload_date': '20260206',
'uploader_id': '18140038001',
},
}]
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/18140038001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
video_data = self._download_json(
'https://hmy0rc1bo2.execute-api.ca-central-1.amazonaws.com/graphql',
display_id, headers={'Content-Type': 'application/json'},
data=json.dumps({
'operationName': 'getVideo',
'variables': {'slug': urllib.parse.urlparse(url).path.rstrip('/')},
'query': '''query getVideo($slug: String) {
getTVOOrgVideo(slug: $slug) {
contentCategory
description
length
program {
nodeUrl
title
}
programOrder
publishedAt
season
tags
thumbnail
title
videoSource {
brightcoveRefId
}
}
}''',
}, separators=(',', ':')).encode(),
)['data']['getTVOOrgVideo']
brightcove_id = traverse_obj(video_data, (
'videoSource', 'brightcoveRefId', {str_or_none}, {require('Brightcove ID')}))
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['CA']}),
'display_id': display_id,
'episode_id': display_id,
**traverse_obj(video_data, {
'title': ('title', {clean_html}, filter),
'categories': ('contentCategory', {clean_html}, filter, all, filter),
'description': ('description', {clean_html}, filter),
'duration': ('length', {parse_duration}),
'episode': ('title', {clean_html}, filter),
'episode_number': ('programOrder', {int_or_none}),
'season_number': ('season', {int_or_none}),
'tags': ('tags', ..., {clean_html}, filter),
'thumbnail': ('thumbnail', {url_or_none}),
'timestamp': ('publishedAt', {parse_iso8601}),
}),
**traverse_obj(video_data, ('program', {
'series': ('title', {clean_html}, filter),
'series_id': ('nodeUrl', {clean_html}, {trim_str(start='/programs/')}, filter),
})),
}

View File

@ -511,7 +511,7 @@ def create_parser():
general.add_option( general.add_option(
'--live-from-start', '--live-from-start',
action='store_true', dest='live_from_start', action='store_true', dest='live_from_start',
help='Download livestreams from the start. Currently experimental and only supported for YouTube and Twitch') help='Download livestreams from the start. Currently experimental and only supported for YouTube, Twitch, and TVer')
general.add_option( general.add_option(
'--no-live-from-start', '--no-live-from-start',
action='store_false', dest='live_from_start', action='store_false', dest='live_from_start',