Compare commits

...

4 Commits

Author SHA1 Message Date
InvalidUsernameException
1e28f6bf74
[ie/kick:vod] Support ongoing livestream VODs (#14154)
Authored by: InvalidUsernameException
2025-08-28 01:26:49 +00:00
garret1317
0b51005b48
[ie/ITVBTCC] Fix extractor (#14161)
Closes #14156
Authored by: garret1317
2025-08-28 01:19:25 +00:00
Abdulmohsen
223baa81f6
[ie/tver] Extract more metadata (#14165)
Authored by: arabcoders
2025-08-28 01:18:10 +00:00
Gegham Zakaryan
18fe696df9
[ie/googledrive] Fix subtitles extraction (#14139)
Authored by: zakaryan2004
2025-08-28 01:12:08 +00:00
4 changed files with 57 additions and 23 deletions

View File

@ -12,6 +12,7 @@ from ..utils import (
get_element_html_by_id,
int_or_none,
lowercase_escape,
parse_qs,
try_get,
update_url_query,
)
@ -111,14 +112,18 @@ class GoogleDriveIE(InfoExtractor):
self._caption_formats_ext.append(f.attrib['fmt_code'])
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
origin_lang_code=None):
origin_lang_code=None, origin_lang_name=None):
if not subtitles_id or not caption_type:
return
captions = {}
for caption_entry in self._captions_xml.findall(
self._CAPTIONS_ENTRY_TAG[caption_type]):
caption_lang_code = caption_entry.attrib.get('lang_code')
if not caption_lang_code:
caption_name = caption_entry.attrib.get('name') or origin_lang_name
if not caption_lang_code or not caption_name:
self.report_warning(f'Missing necessary caption metadata. '
f'Need lang_code and name attributes. '
f'Found: {caption_entry.attrib}')
continue
caption_format_data = []
for caption_format in self._caption_formats_ext:
@ -129,7 +134,7 @@ class GoogleDriveIE(InfoExtractor):
'lang': (caption_lang_code if origin_lang_code is None
else origin_lang_code),
'type': 'track',
'name': '',
'name': caption_name,
'kind': '',
}
if origin_lang_code is not None:
@ -155,14 +160,15 @@ class GoogleDriveIE(InfoExtractor):
self._download_subtitles_xml(video_id, subtitles_id, hl)
if not self._captions_xml:
return
track = self._captions_xml.find('track')
track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
if track is None:
return
origin_lang_code = track.attrib.get('lang_code')
if not origin_lang_code:
origin_lang_name = track.attrib.get('name')
if not origin_lang_code or not origin_lang_name:
return
return self._get_captions_by_type(
video_id, subtitles_id, 'automatic_captions', origin_lang_code)
video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
def _real_extract(self, url):
video_id = self._match_id(url)
@ -268,10 +274,8 @@ class GoogleDriveIE(InfoExtractor):
subtitles_id = None
ttsurl = get_value('ttsurl')
if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl
# query string
subtitles_id = ttsurl.encode().decode(
'unicode_escape').split('=')[-1]
# the subtitles ID is the vid param of the ttsurl query
subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
self.cookiejar.clear(domain='.google.com', path='/', name='NID')

View File

@ -18,6 +18,7 @@ from ..utils import (
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class ITVIE(InfoExtractor):
@ -223,6 +224,7 @@ class ITVBTCCIE(InfoExtractor):
},
'playlist_count': 12,
}, {
# news page, can have absent `data` field
'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
'info_dict': {
'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
@ -243,7 +245,7 @@ class ITVBTCCIE(InfoExtractor):
entries = []
for video in json_map:
if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')):
if not any(traverse_obj(video, ('data', attr)) == 'Brightcove' for attr in ('name', 'type')):
continue
video_id = video['data']['id']
account_id = video['data']['accountId']

View File

@ -95,26 +95,47 @@ class KickVODIE(KickBaseIE):
IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'md5': '3870f94153e40e7121a6e46c068b70cb',
# Regular VOD
'url': 'https://kick.com/xqc/videos/5c697a87-afce-4256-b01f-3c8fe71ef5cb',
'info_dict': {
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'id': '5c697a87-afce-4256-b01f-3c8fe71ef5cb',
'ext': 'mp4',
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
'title': '🐗LIVE🐗CLICK🐗HERE🐗DRAMA🐗ALL DAY🐗NEWS🐗VIDEOS🐗CLIPS🐗GAMES🐗STUFF🐗WOW🐗IM HERE🐗LETS GO🐗COOL🐗VERY NICE🐗',
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
'channel': 'xqc',
'channel_id': '668',
'uploader': 'xQc',
'uploader_id': '676',
'upload_date': '20240909',
'timestamp': 1725919141,
'duration': 10155.0,
'thumbnail': r're:^https?://.*\.jpg',
'channel': 'xqc',
'channel_id': '668',
'view_count': int,
'categories': ['Just Chatting'],
'age_limit': 0,
'age_limit': 18,
'duration': 22278.0,
'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Deadlock'],
'timestamp': 1756082443,
'upload_date': '20250825',
},
'params': {'skip_download': 'm3u8'},
}, {
# VOD of ongoing livestream (at the time of writing the test, ID rotates every two days)
'url': 'https://kick.com/a-log-burner/videos/5230df84-ea38-46e1-be4f-f5949ae55641',
'info_dict': {
'id': '5230df84-ea38-46e1-be4f-f5949ae55641',
'ext': 'mp4',
'title': r're:😴 Cozy Fireplace ASMR 🔥 | Relax, Focus, Sleep 💤',
'description': 'md5:080bc713eac0321a7b376a1b53816d1b',
'uploader': 'A_Log_Burner',
'uploader_id': '65114691',
'channel': 'a-log-burner',
'channel_id': '63967687',
'view_count': int,
'age_limit': 18,
'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Other, Watch Party'],
'timestamp': int,
'upload_date': str,
'live_status': 'is_live',
},
'skip': 'live',
}]
def _real_extract(self, url):
@ -137,6 +158,7 @@ class KickVODIE(KickBaseIE):
'categories': ('livestream', 'categories', ..., 'name', {str}),
'view_count': ('views', {int_or_none}),
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
'is_live': ('livestream', 'is_live', {bool}),
}),
}

View File

@ -45,6 +45,8 @@ class TVerIE(StreaksBaseIE):
'release_timestamp': 1651453200,
'release_date': '20220502',
'_old_archive_ids': ['brightcovenew ref:baeebeac-a2a6-4dbf-9eb3-c40d59b40068'],
'series_id': 'sru35hwdd2',
'season_id': 'ss2lcn4af6',
},
}, {
# via Brightcove backend (deprecated)
@ -67,6 +69,8 @@ class TVerIE(StreaksBaseIE):
'upload_date': '20220501',
'release_timestamp': 1651453200,
'release_date': '20220502',
'series_id': 'sru35hwdd2',
'season_id': 'ss2lcn4af6',
},
'params': {'extractor_args': {'tver': {'backend': ['brightcove']}}},
}, {
@ -202,6 +206,8 @@ class TVerIE(StreaksBaseIE):
'description': ('description', {str}),
'release_timestamp': ('viewStatus', 'startAt', {int_or_none}),
'episode_number': ('no', {int_or_none}),
'series_id': ('seriesID', {str}),
'season_id': ('seasonID', {str}),
}),
}