Merge branch 'yt-dlp:master' into misc/cleanup

This commit is contained in:
bashonly 2025-04-24 12:43:37 -05:00
commit 9fb1744bdc
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
37 changed files with 1300 additions and 673 deletions

View File

@ -1770,7 +1770,7 @@ The following extractors use this feature:
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`

View File

@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
return return
if op == 'startswith': if op == 'startswith':
if not val.startswith(got): if not got.startswith(val):
yield field, f'should start with {val!r}, got {got!r}' yield field, f'should start with {val!r}, got {got!r}'
return return

View File

@ -39,6 +39,7 @@ from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
from yt_dlp.networking import ( from yt_dlp.networking import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,
@ -1856,6 +1857,7 @@ class TestRequest:
def test_request_helpers(self): def test_request_helpers(self):
assert HEADRequest('http://example.com').method == 'HEAD' assert HEADRequest('http://example.com').method == 'HEAD'
assert PATCHRequest('http://example.com').method == 'PATCH'
assert PUTRequest('http://example.com').method == 'PUT' assert PUTRequest('http://example.com').method == 'PUT'
def test_headers(self): def test_headers(self):

View File

@ -659,6 +659,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
self.assertEqual(url_or_none('ws://foo.de'), 'ws://foo.de')
self.assertEqual(url_or_none('wss://foo.de'), 'wss://foo.de')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)

View File

@ -85,6 +85,7 @@ class NiconicoLiveFD(FileDownloader):
'quality': live_quality, 'quality': live_quality,
'protocol': 'hls+fmp4', 'protocol': 'hls+fmp4',
'latency': live_latency, 'latency': live_latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False, 'chasePlay': False,
}, },
'room': { 'room': {

View File

@ -903,6 +903,7 @@ from .ivi import (
IviIE, IviIE,
) )
from .ivideon import IvideonIE from .ivideon import IvideonIE
from .ivoox import IvooxIE
from .iwara import ( from .iwara import (
IwaraIE, IwaraIE,
IwaraPlaylistIE, IwaraPlaylistIE,
@ -960,7 +961,10 @@ from .kick import (
) )
from .kicker import KickerIE from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kika import KikaIE from .kika import (
KikaIE,
KikaPlaylistIE,
)
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE from .kommunetv import KommunetvIE
@ -1061,6 +1065,7 @@ from .loom import (
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import ( from .lrt import (
LRTVODIE, LRTVODIE,
LRTRadioIE,
LRTStreamIE, LRTStreamIE,
) )
from .lsm import ( from .lsm import (
@ -1493,6 +1498,10 @@ from .paramountplus import (
) )
from .parler import ParlerIE from .parler import ParlerIE
from .parlview import ParlviewIE from .parlview import ParlviewIE
from .parti import (
PartiLivestreamIE,
PartiVideoIE,
)
from .patreon import ( from .patreon import (
PatreonCampaignIE, PatreonCampaignIE,
PatreonIE, PatreonIE,
@ -1774,7 +1783,6 @@ from .rtvcplay import (
from .rtve import ( from .rtve import (
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
RTVEInfantilIE,
RTVELiveIE, RTVELiveIE,
RTVETelevisionIE, RTVETelevisionIE,
) )
@ -2228,7 +2236,10 @@ from .tvplay import (
TVPlayIE, TVPlayIE,
) )
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import TvwIE from .tvw import (
TvwIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE from .tweakers import TweakersIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE from .twentythreevideo import TwentyThreeVideoIE

View File

@ -21,6 +21,7 @@ from ..utils import (
int_or_none, int_or_none,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url,
update_url_query, update_url_query,
) )
@ -417,6 +418,10 @@ class AbemaTVIE(AbemaTVBaseIE):
'is_live': is_live, 'is_live': is_live,
'availability': availability, 'availability': availability,
}) })
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
info['thumbnails'] = [{'url': thumbnail}]
return info return info

View File

@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
'info_dict': { 'info_dict': {
'id': '91275', 'id': '91275',
'ext': 'aac', 'ext': 'mp3',
'title': 'md5:a9b15488009065556900169fb8061cce', 'title': 'md5:a9b15488009065556900169fb8061cce',
'episode': 'md5:a9b15488009065556900169fb8061cce', 'episode': 'md5:a9b15488009065556900169fb8061cce',
'series': 'Analizy', 'series': 'Analizy',
@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor):
raise ExtractorError('No such podcast', expected=True) raise ExtractorError('No such podcast', expected=True)
metadata = metadata[0] metadata = metadata[0]
formats = [] mp3_url = self._download_json(
for ext in ('aac', 'mp3'): 'https://api.podcast.radioagora.pl/api4/getSongUrl',
url_data = self._download_json( media_id, 'Downloading podcast mp3 URL', query={
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', 'podcast_id': media_id,
media_id, f'Downloading podcast {ext} URL') 'device_id': str(uuid.uuid4()),
# prevents inserting the mp3 (default) multiple times 'ppre': 'false',
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: 'audio': 'mp3',
formats.append({ })['link_ssl']
'url': url_data['link_ssl'],
'ext': ext,
'vcodec': 'none',
'acodec': ext,
})
return { return {
'id': media_id, 'id': media_id,
'formats': formats, 'url': mp3_url,
'vcodec': 'none',
'ext': 'mp3',
'title': metadata.get('podcast_name'), 'title': metadata.get('podcast_name'),
'series': metadata.get('series_name'), 'series': metadata.get('series_name'),
'episode': metadata.get('podcast_name'), 'episode': metadata.get('podcast_name'),

View File

@ -1,64 +1,105 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class AtresPlayerIE(InfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer' _NETRC_MACHINE = 'atresplayer'
_TESTS = [ _TESTS = [{
{ 'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', 'info_dict': {
'info_dict': { 'ext': 'mp4',
'id': '5d4aa2c57ed1a88fc715a615', 'id': '67f2dfb2fb6ab0e4c7203849',
'ext': 'mp4', 'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
'title': 'Capítulo 7: Asuntos pendientes', 'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'channel': 'laSexta',
'duration': 3413, 'duration': 31,
}, 'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
'skip': 'This video is only available for registered users', 'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
'series': 'El Objetivo',
'season': 'Temporada 12',
'timestamp': 1743970079,
'upload_date': '20250406',
}, },
{ }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', 'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67f836baa4a5b0e4147ca59a',
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
'channel': 'Antena 3',
'duration': 2556,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
'series': 'El Hormiguero ',
'season': 'Temporada 14',
'timestamp': 1744320111,
'upload_date': '20250410',
}, },
{ }, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67a6038b64ceca00070f4f69',
'display_id': 'capitulo-3-supervivientes',
'title': 'Capítulo 3: Supervivientes',
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
'channel': 'Flooxer',
'duration': 1196,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
'series': 'BIARA: Proyecto Lázarus',
'season': 'Temporada 1',
'season_number': 1,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1743095191,
'upload_date': '20250327',
}, },
] }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
}]
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _perform_login(self, username, password): def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
try: try:
target_url = self._download_json( self._download_webpage(
'https://account.atresmedia.com/api/login', None, 'https://account.atresplayer.com/auth/v1/login', None,
'Logging in', headers={ 'Logging in', 'Failed to log in', data=urlencode_postdata({
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'username': username, 'username': username,
'password': password, 'password': password,
}))['targetUrl'] }))
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True) raise ExtractorError('Invalid username and/or password', expected=True)
raise raise
self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups() display_id, video_id = self._match_valid_url(url).groups()
metadata_url = self._download_json(
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
query={'href': urllib.parse.urlparse(url).path})['href']
metadata = self._download_json(metadata_url, video_id)
try: try:
episode = self._download_json( video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None) error = self._parse_json(e.cause.response.read(), None)
@ -67,37 +108,45 @@ class AtresPlayerIE(InfoExtractor):
raise ExtractorError(error['error_description'], expected=True) raise ExtractorError(error['error_description'], expected=True)
raise raise
title = episode['titulo']
formats = [] formats = []
subtitles = {} subtitles = {}
for source in episode.get('sources', []): for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
src = source.get('src') src_url = source['src']
if not src:
continue
src_type = source.get('type') src_type = source.get('type')
if src_type == 'application/vnd.apple.mpegurl': if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
formats, subtitles = self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
src, video_id, 'mp4', 'm3u8_native', src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
m3u8_id='hls', fatal=False) elif src_type in ('application/dash+xml', 'application/dash+hevc'):
elif src_type == 'application/dash+xml': fmts, subs = self._extract_mpd_formats_and_subtitles(
formats, subtitles = self._extract_mpd_formats( src_url, video_id, mpd_id='dash', fatal=False)
src, video_id, mpd_id='dash', fatal=False) else:
continue
heartbeat = episode.get('heartbeat') or {} formats.extend(fmts)
omniture = episode.get('omniture') or {} self._merge_subtitles(subs, target=subtitles)
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
return { return {
'display_id': display_id, 'display_id': display_id,
'id': video_id, 'id': video_id,
'title': title,
'description': episode.get('descripcion'),
'thumbnail': episode.get('imgPoster'),
'duration': int_or_none(episode.get('duration')),
'formats': formats, 'formats': formats,
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(video_data, {
'title': ('titulo', {str}),
'description': ('descripcion', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
'age_limit': ('ageRating', {parse_age_limit}),
}),
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'age_limit': ('ageRating', {parse_age_limit}),
'series': ('format', 'title', {str}),
'season': ('currentSeason', 'title', {str}),
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
'episode_number': ('numberOfEpisode', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
'channel': ('channel', 'title', {str}),
}),
} }

View File

@ -13,16 +13,17 @@ from ..compat import compat_ord
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
multipart_encode, multipart_encode,
parse_duration, parse_duration,
traverse_obj,
try_call, try_call,
try_get, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
class CDAIE(InfoExtractor): class CDAIE(InfoExtractor):
@ -290,34 +291,47 @@ class CDAIE(InfoExtractor):
if not video or 'file' not in video: if not video or 'file' not in video:
self.report_warning(f'Unable to extract {version} version information') self.report_warning(f'Unable to extract {version} version information')
return return
if video['file'].startswith('uggc'):
video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
video_quality = video.get('quality') video_quality = video.get('quality')
qualities = video.get('qualities', {}) qualities = video.get('qualities', {})
video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality) video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
info_dict['formats'].append({ if video.get('file'):
'url': video['file'], if video['file'].startswith('uggc'):
'format_id': video_quality, video['file'] = codecs.decode(video['file'], 'rot_13')
'height': int_or_none(video_quality[:-1]), if video['file'].endswith('adc.mp4'):
}) video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
info_dict['formats'].append({
'url': video['file'],
'format_id': video_quality,
'height': int_or_none(video_quality[:-1]),
})
for quality, cda_quality in qualities.items(): for quality, cda_quality in qualities.items():
if quality == video_quality: if quality == video_quality:
continue continue
data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
data = json.dumps(data).encode() data = json.dumps(data).encode()
video_url = self._download_json( response = self._download_json(
f'https://www.cda.pl/video/{video_id}', video_id, headers={ f'https://www.cda.pl/video/{video_id}', video_id, headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}, data=data, note=f'Fetching {quality} url', }, data=data, note=f'Fetching {quality} url',
errnote=f'Failed to fetch {quality} url', fatal=False) errnote=f'Failed to fetch {quality} url', fatal=False)
if try_get(video_url, lambda x: x['result']['status']) == 'ok': if (
video_url = try_get(video_url, lambda x: x['result']['resp']) traverse_obj(response, ('result', 'status')) != 'ok'
or not traverse_obj(response, ('result', 'resp', {url_or_none}))
):
continue
video_url = response['result']['resp']
ext = determine_ext(video_url)
if ext == 'mpd':
info_dict['formats'].extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'm3u8':
info_dict['formats'].extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
info_dict['formats'].append({ info_dict['formats'].append({
'url': video_url, 'url': video_url,
'format_id': quality, 'format_id': quality,
@ -353,7 +367,7 @@ class CDAIE(InfoExtractor):
class CDAFolderIE(InfoExtractor): class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36 _MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.cda.pl/domino264/folder/31188385', 'url': 'https://www.cda.pl/domino264/folder/31188385',
@ -378,6 +392,9 @@ class CDAFolderIE(InfoExtractor):
'title': 'TESTY KOSMETYKÓW', 'title': 'TESTY KOSMETYKÓW',
}, },
'playlist_mincount': 139, 'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1570,6 +1570,8 @@ class InfoExtractor:
"""Yield all json ld objects in the html""" """Yield all json ld objects in the html"""
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
if not fatal and not isinstance(html, str):
return
for mobj in re.finditer(JSON_LD_RE, html): for mobj in re.finditer(JSON_LD_RE, html):
json_ld_item = self._parse_json( json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal, mobj.group('json_ld'), video_id, fatal=fatal,

View File

@ -16,7 +16,6 @@ from ..utils import (
MEDIA_EXTENSIONS, MEDIA_EXTENSIONS,
ExtractorError, ExtractorError,
UnsupportedError, UnsupportedError,
base_url,
determine_ext, determine_ext,
determine_protocol, determine_protocol,
dict_get, dict_get,
@ -38,6 +37,7 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
update_url,
update_url_query, update_url_query,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
@ -2538,12 +2538,13 @@ class GenericIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
self._parse_xspf( self._parse_xspf(
doc, video_id, xspf_url=url, doc, video_id, xspf_url=url,
xspf_base_url=full_response.url), xspf_base_url=new_url),
video_id) video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles( info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
doc, doc,
mpd_base_url=base_url(full_response.url), # Do not use yt_dlp.utils.base_url here since it will raise on file:// URLs
mpd_base_url=update_url(new_url, query=None, fragment=None).rpartition('/')[0],
mpd_url=url) mpd_url=url)
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
self._extra_manifest_info(info_dict, url) self._extra_manifest_info(info_dict, url)

View File

@ -8,7 +8,7 @@ from ..utils.traversal import traverse_obj
class GetCourseRuPlayerIE(InfoExtractor): class GetCourseRuPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+' _VALID_URL = r'https?://(?:player02\.getcourse\.ru|cf-api-2\.vhcdn\.com)/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)'] _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
_TESTS = [{ _TESTS = [{
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag', 'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
@ -20,6 +20,16 @@ class GetCourseRuPlayerIE(InfoExtractor):
'duration': 1693, 'duration': 1693,
}, },
'skip': 'JWT expired', 'skip': 'JWT expired',
}, {
'url': 'https://cf-api-2.vhcdn.com/sign-player/?json=example',
'info_dict': {
'id': '435735291',
'title': '8afd7c489952108e00f019590f3711f3',
'ext': 'mp4',
'thumbnail': 'https://preview-htz.vhcdn.com/preview/8afd7c489952108e00f019590f3711f3/preview.jpg?version=1682170973&host=vh-72',
'duration': 777,
},
'skip': 'JWT expired',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -168,7 +178,7 @@ class GetCourseRuIE(InfoExtractor):
playlist_id = self._search_regex( playlist_id = self._search_regex(
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id) r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
title = self._og_search_title(webpage) or self._html_extract_title(webpage) title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
return self.playlist_from_matches( return self.playlist_from_matches(
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage), re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),

78
yt_dlp/extractor/ivoox.py Normal file
View File

@ -0,0 +1,78 @@
from .common import InfoExtractor
from ..utils import int_or_none, parse_iso8601, url_or_none, urljoin
from ..utils.traversal import traverse_obj
class IvooxIE(InfoExtractor):
_VALID_URL = (
r'https?://(?:www\.)?ivoox\.com/(?:\w{2}/)?[^/?#]+_rf_(?P<id>[0-9]+)_1\.html',
r'https?://go\.ivoox\.com/rf/(?P<id>[0-9]+)',
)
_TESTS = [{
'url': 'https://www.ivoox.com/dex-08x30-rostros-del-mal-los-asesinos-en-audios-mp3_rf_143594959_1.html',
'md5': '993f712de5b7d552459fc66aa3726885',
'info_dict': {
'id': '143594959',
'ext': 'mp3',
'timestamp': 1742731200,
'channel': 'DIAS EXTRAÑOS con Santiago Camacho',
'title': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
'description': 'md5:eae8b4b9740d0216d3871390b056bb08',
'uploader': 'Santiago Camacho',
'thumbnail': 'https://static-1.ivoox.com/audios/c/d/5/2/cd52f46783fe735000c33a803dce2554_XXL.jpg',
'upload_date': '20250323',
'episode': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
'duration': 11837,
'tags': ['españa', 'asesinos en serie', 'arropiero', 'historia criminal', 'mataviejas'],
},
}, {
'url': 'https://go.ivoox.com/rf/143594959',
'only_matching': True,
}, {
'url': 'https://www.ivoox.com/en/campodelgas-28-03-2025-audios-mp3_rf_144036942_1.html',
'only_matching': True,
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id, fatal=False)
data = self._search_nuxt_data(
webpage, media_id, fatal=False, traverse=('data', 0, 'data', 'audio'))
direct_download = self._download_json(
f'https://vcore-web.ivoox.com/v1/public/audios/{media_id}/download-url', media_id, fatal=False,
note='Fetching direct download link', headers={'Referer': url})
download_paths = {
*traverse_obj(direct_download, ('data', 'downloadUrl', {str}, filter, all)),
*traverse_obj(data, (('downloadUrl', 'mediaUrl'), {str}, filter)),
}
formats = []
for path in download_paths:
formats.append({
'url': urljoin('https://ivoox.com', path),
'http_headers': {'Referer': url},
})
return {
'id': media_id,
'formats': formats,
'uploader': self._html_search_regex(r'data-prm-author="([^"]+)"', webpage, 'author', default=None),
'timestamp': parse_iso8601(
self._html_search_regex(r'data-prm-pubdate="([^"]+)"', webpage, 'timestamp', default=None)),
'channel': self._html_search_regex(r'data-prm-podname="([^"]+)"', webpage, 'channel', default=None),
'title': self._html_search_regex(r'data-prm-title="([^"]+)"', webpage, 'title', default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
**self._search_json_ld(webpage, media_id, default={}),
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('uploadDate', {parse_iso8601(delimiter=' ')}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'name', {str}),
}),
}

View File

@ -1,3 +1,5 @@
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -124,3 +126,43 @@ class KikaIE(InfoExtractor):
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}), 'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}), }),
} }
class KikaPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w-]+/(?P<id>[a-z-]+\d+)'
_TESTS = [{
'url': 'https://www.kika.de/logo/logo-die-welt-und-ich-562',
'info_dict': {
'id': 'logo-die-welt-und-ich-562',
'title': 'logo!',
'description': 'md5:7b9d7f65561b82fa512f2cfb553c397d',
},
'playlist_count': 100,
}]
def _entries(self, playlist_url, playlist_id):
for page in itertools.count(1):
data = self._download_json(playlist_url, playlist_id, note=f'Downloading page {page}')
for item in traverse_obj(data, ('content', lambda _, v: url_or_none(v['api']['url']))):
yield self.url_result(
item['api']['url'], ie=KikaIE,
**traverse_obj(item, {
'id': ('id', {str}),
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'timestamp': ('date', {parse_iso8601}),
}))
playlist_url = traverse_obj(data, ('links', 'next', {url_or_none}))
if not playlist_url:
break
def _real_extract(self, url):
playlist_id = self._match_id(url)
brand_data = self._download_json(
f'https://www.kika.de/_next-api/proxy/v1/brands/{playlist_id}', playlist_id)
return self.playlist_result(
self._entries(brand_data['videoSubchannel']['videosPageUrl'], playlist_id),
playlist_id, title=brand_data.get('title'), description=brand_data.get('description'))

View File

@ -82,7 +82,10 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)' _VALID_URL = [
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
@ -106,6 +109,9 @@ class LinkedInIE(LinkedInBaseIE):
'like_count': int, 'like_count': int,
'subtitles': 'mincount:1', 'subtitles': 'mincount:1',
}, },
}, {
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,5 +1,9 @@
import json
import random
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, url_or_none from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
from ..utils.traversal import require, traverse_obj from ..utils.traversal import require, traverse_obj
@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
'upload_date': '20250226', 'upload_date': '20250226',
'modified_date': '20250226', 'modified_date': '20250226',
}, },
}, {
# Requires video authorization
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'md5': '0513edf85c1e65c9521f555f665387d5',
'info_dict': {
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'ext': 'mp4',
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
'uploader_id': '7Y9JNAZC3Q',
'channel': 'ayellol',
'channel_follower_count': int,
'comment_count': int,
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
'duration': 1229,
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
'tags': ['Gameplay', 'Carry'],
'series': 'League of Legends',
'timestamp': 1741182253,
'upload_date': '20250305',
'modified_timestamp': 1741182419,
'modified_date': '20250305',
},
}] }]
# From _app.js
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
def _is_jwt_expired(self, token):
return jwt_decode_hs256(token)['exp'] - time.time() < 300
def _get_access_token(self, video_id):
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
if access_token and not self._is_jwt_expired(access_token):
return access_token
access_token = traverse_obj(self._download_json(
'https://api.getloconow.com/v3/user/device_profile/', video_id,
'Downloading access token', fatal=False, data=json.dumps({
'platform': 7,
'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
'model': 'Mozilla',
'os_name': 'Win32',
'os_ver': '5.0 (Windows)',
'app_ver': '5.0 (Windows)',
}).encode(), headers={
'Content-Type': 'application/json;charset=utf-8',
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
'X-APP-LANG': 'en',
'X-APP-LOCALE': 'en-US',
'X-CLIENT-ID': self._CLIENT_ID,
'X-CLIENT-SECRET': self._CLIENT_SECRET,
'X-PLATFORM': '7',
}), 'access_token')
if access_token and not self._is_jwt_expired(access_token):
self._set_cookie('.loco.com', 'access_token', access_token)
return access_token
def _real_extract(self, url): def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).group('type', 'id') video_type, video_id = self._match_valid_url(url).group('type', 'id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), ( stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')})) 'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
if access_token := self._get_access_token(video_id):
self._request_webpage(
'https://drm.loco.com/v1/streams/playback/', video_id,
'Downloading video authorization', fatal=False, headers={
'authorization': access_token,
}, query={
'stream_uid': stream['uid'],
})
return { return {
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id), 'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),

View File

@ -2,8 +2,11 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
merge_dicts, merge_dicts,
str_or_none,
traverse_obj, traverse_obj,
unified_timestamp,
url_or_none, url_or_none,
urljoin,
) )
@ -80,7 +83,7 @@ class LRTVODIE(LRTBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
path, video_id = self._match_valid_url(url).groups() path, video_id = self._match_valid_url(url).group('path', 'id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
media_url = self._extract_js_var(webpage, 'main_url', path) media_url = self._extract_js_var(webpage, 'main_url', path)
@ -106,3 +109,42 @@ class LRTVODIE(LRTBaseIE):
} }
return merge_dicts(clean_info, jw_data, json_ld_data) return merge_dicts(clean_info, jw_data, json_ld_data)
class LRTRadioIE(LRTBaseIE):
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/radioteka/irasas/(?P<id>\d+)/(?P<path>[^?#/]+)'
_TESTS = [{
# m3u8 download
'url': 'https://www.lrt.lt/radioteka/irasas/2000359728/nemarios-eiles-apie-pragarus-ir-skaistyklas-su-aiste-kiltinaviciute',
'info_dict': {
'id': '2000359728',
'ext': 'm4a',
'title': 'Nemarios eilės: apie pragarus ir skaistyklas su Aiste Kiltinavičiūte',
'description': 'md5:5eee9a0e86a55bf547bd67596204625d',
'timestamp': 1726143120,
'upload_date': '20240912',
'tags': 'count:5',
'thumbnail': r're:https?://.+/.+\.jpe?g',
'categories': ['Daiktiniai įrodymai'],
},
}, {
'url': 'https://www.lrt.lt/radioteka/irasas/2000304654/vakaras-su-knyga-svetlana-aleksijevic-cernobylio-malda-v-dalis?season=%2Fmediateka%2Faudio%2Fvakaras-su-knyga%2F2023',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, path = self._match_valid_url(url).group('id', 'path')
media = self._download_json(
'https://www.lrt.lt/radioteka/api/media', video_id,
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
return traverse_obj(media, {
'id': ('id', {int}, {str_or_none}),
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
'description': ('content', {clean_html}, {str}),
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
})

View File

@ -1,31 +1,38 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
str_to_int, join_nonempty,
parse_count,
parse_duration,
parse_iso8601,
url_or_none, url_or_none,
urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class ManyVidsIE(InfoExtractor): class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# preview video # preview video
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'md5': '738dc723f7735ee9602f7ea352a6d058',
'info_dict': { 'info_dict': {
'id': '133957', 'id': '530341-preview',
'ext': 'mp4', 'ext': 'mp4',
'title': 'everthing about me (Preview)', 'title': 'MV Tips & Tricks (Preview)',
'uploader': 'ellyxxix', 'description': r're:I will take you on a tour around .{1313}$',
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
'uploader': 'DestinyDiaz',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_timestamp': 1508419904,
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
'release_date': '20171019',
'duration': 3167.0,
}, },
'expected_warnings': ['Only extracting preview'],
}, { }, {
# full video # full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
'id': '935718', 'id': '935718',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MY FACE REVEAL', 'title': 'MY FACE REVEAL',
'description': 'md5:ec5901d41808b3746fed90face161612', 'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
'uploader': 'Sarah Calanthe', 'uploader': 'Sarah Calanthe',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_date': '20181110',
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
'release_timestamp': 1541851200,
'duration': 224.0,
}, },
}] }]
_API_BASE = 'https://www.manyvids.com/bff/store/video'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
formats, preview_only = [], True
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js' for format_id, path in [
try: ('preview', ['teaser', 'filepath']),
webpage = self._download_webpage(real_url, video_id) ('transcoded', ['transcodedFilepath']),
except Exception: ('filepath', ['filepath']),
# probably useless fallback ]:
webpage = self._download_webpage(url, video_id) format_url = traverse_obj(video_data, (*path, {url_or_none}))
if not format_url:
info = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'meta details', default='')
info = extract_attributes(info)
player = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
webpage, 'player details', default='')
player = extract_attributes(player)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
mv_token = self._search_regex(
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'mv token', default=None, group='value')
if mv_token:
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
})
formats = []
for v_url, fmt in video_urls_and_ids:
v_url = url_or_none(v_url)
if not v_url:
continue continue
if determine_ext(v_url) == 'm3u8': if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
else: else:
formats.append({ formats.append({
'url': v_url, 'url': format_url,
'format_id': fmt, 'format_id': format_id,
'preference': -10 if format_id == 'preview' else None,
'quality': 10 if format_id == 'filepath' else None,
'height': int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
}) })
if format_id != 'preview':
preview_only = False
self._remove_duplicate_formats(formats) metadata = traverse_obj(
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
title = traverse_obj(metadata, ('title', {clean_html}))
for f in formats: if preview_only:
if f.get('height') is None: title = join_nonempty(title, '(Preview)', delim=' ')
f['height'] = int_or_none( video_id += '-preview'
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) self.report_warning(
if '/preview/' in f['url']: f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
def get_likes():
likes = self._search_regex(
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'description': txt_or_none(info.get('data-meta-description')), **traverse_obj(metadata, {
'uploader': txt_or_none(info.get('data-meta-author')), 'description': ('description', {clean_html}),
'thumbnail': ( 'uploader': ('model', 'displayName', {clean_html}),
url_or_none(info.get('data-meta-image')) 'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
or url_or_none(player.get('data-video-screenshot'))), 'view_count': ('views', {parse_count}),
'view_count': get_views(), 'like_count': ('likes', {parse_count}),
'like_count': get_likes(), 'release_timestamp': ('launchDate', {parse_iso8601}),
'duration': ('videoDuration', {parse_duration}),
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
}),
} }

View File

@ -10,7 +10,9 @@ from ..utils import (
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class MixcloudBaseIE(InfoExtractor): class MixcloudBaseIE(InfoExtractor):
@ -37,7 +39,7 @@ class MixcloudIE(MixcloudBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': 'Cryptkeeper', 'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach', 'uploader': 'dholbach',
'uploader_id': 'dholbach', 'uploader_id': 'dholbach',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'view_count': int, 'view_count': int,
@ -46,10 +48,11 @@ class MixcloudIE(MixcloudBaseIE):
'uploader_url': 'https://www.mixcloud.com/dholbach/', 'uploader_url': 'https://www.mixcloud.com/dholbach/',
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills', 'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
'duration': 3723, 'duration': 3723,
'tags': [], 'tags': ['liquid drum and bass', 'drum and bass'],
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'artists': list,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -67,7 +70,7 @@ class MixcloudIE(MixcloudBaseIE):
'upload_date': '20150203', 'upload_date': '20150203',
'uploader_url': 'https://www.mixcloud.com/gillespeterson/', 'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
'duration': 2992, 'duration': 2992,
'tags': [], 'tags': ['jazz', 'soul', 'world music', 'funk'],
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -149,8 +152,6 @@ class MixcloudIE(MixcloudBaseIE):
elif reason: elif reason:
raise ExtractorError('Track is restricted', expected=True) raise ExtractorError('Track is restricted', expected=True)
title = cloudcast['name']
stream_info = cloudcast['streamInfo'] stream_info = cloudcast['streamInfo']
formats = [] formats = []
@ -182,47 +183,39 @@ class MixcloudIE(MixcloudBaseIE):
self.raise_login_required(metadata_available=True) self.raise_login_required(metadata_available=True)
comments = [] comments = []
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): for node in traverse_obj(cloudcast, ('comments', 'edges', ..., 'node', {dict})):
node = edge.get('node') or {}
text = strip_or_none(node.get('comment')) text = strip_or_none(node.get('comment'))
if not text: if not text:
continue continue
user = node.get('user') or {}
comments.append({ comments.append({
'author': user.get('displayName'),
'author_id': user.get('username'),
'text': text, 'text': text,
'timestamp': parse_iso8601(node.get('created')), **traverse_obj(node, {
'author': ('user', 'displayName', {str}),
'author_id': ('user', 'username', {str}),
'timestamp': ('created', {parse_iso8601}),
}),
}) })
tags = []
for t in cloudcast.get('tags'):
tag = try_get(t, lambda x: x['tag']['name'], str)
if not tag:
tags.append(tag)
get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
owner = cloudcast.get('owner') or {}
return { return {
'id': track_id, 'id': track_id,
'title': title,
'formats': formats, 'formats': formats,
'description': cloudcast.get('description'),
'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], str),
'uploader': owner.get('displayName'),
'timestamp': parse_iso8601(cloudcast.get('publishDate')),
'uploader_id': owner.get('username'),
'uploader_url': owner.get('url'),
'duration': int_or_none(cloudcast.get('audioLength')),
'view_count': int_or_none(cloudcast.get('plays')),
'like_count': get_count('favorites'),
'repost_count': get_count('reposts'),
'comment_count': get_count('comments'),
'comments': comments, 'comments': comments,
'tags': tags, **traverse_obj(cloudcast, {
'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, 'title': ('name', {str}),
'description': ('description', {str}),
'thumbnail': ('picture', 'url', {url_or_none}),
'timestamp': ('publishDate', {parse_iso8601}),
'duration': ('audioLength', {int_or_none}),
'uploader': ('owner', 'displayName', {str}),
'uploader_id': ('owner', 'username', {str}),
'uploader_url': ('owner', 'url', {url_or_none}),
'view_count': ('plays', {int_or_none}),
'like_count': ('favorites', 'totalCount', {int_or_none}),
'repost_count': ('reposts', 'totalCount', {int_or_none}),
'comment_count': ('comments', 'totalCount', {int_or_none}),
'tags': ('tags', ..., 'tag', 'name', {str}, filter, all, filter),
'artists': ('featuringArtistList', ..., {str}, filter, all, filter),
}),
} }
@ -295,7 +288,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/', 'url': 'http://www.mixcloud.com/dholbach/',
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'dholbach (uploads)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
'playlist_mincount': 36, 'playlist_mincount': 36,
@ -303,7 +296,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/uploads/', 'url': 'http://www.mixcloud.com/dholbach/uploads/',
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'dholbach (uploads)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
'playlist_mincount': 36, 'playlist_mincount': 36,
@ -311,7 +304,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/favorites/', 'url': 'http://www.mixcloud.com/dholbach/favorites/',
'info_dict': { 'info_dict': {
'id': 'dholbach_favorites', 'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)', 'title': 'dholbach (favorites)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
# 'params': { # 'params': {
@ -337,7 +330,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'title': 'First Ear (stream)', 'title': 'First Ear (stream)',
'description': 'we maraud for ears', 'description': 'we maraud for ears',
}, },
'playlist_mincount': 269, 'playlist_mincount': 267,
}] }]
_TITLE_KEY = 'displayName' _TITLE_KEY = 'displayName'
@ -361,7 +354,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'id': 'maxvibes_jazzcat-on-ness-radio', 'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Ness Radio sessions', 'title': 'Ness Radio sessions',
}, },
'playlist_mincount': 59, 'playlist_mincount': 58,
}] }]
_TITLE_KEY = 'name' _TITLE_KEY = 'name'
_DESCRIPTION_KEY = 'description' _DESCRIPTION_KEY = 'description'

View File

@ -27,6 +27,7 @@ from ..utils import (
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
unified_timestamp,
update_url_query, update_url_query,
url_basename, url_basename,
url_or_none, url_or_none,
@ -985,6 +986,7 @@ class NiconicoLiveIE(InfoExtractor):
'quality': 'abr', 'quality': 'abr',
'protocol': 'hls+fmp4', 'protocol': 'hls+fmp4',
'latency': latency, 'latency': latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False, 'chasePlay': False,
}, },
'room': { 'room': {
@ -1005,6 +1007,7 @@ class NiconicoLiveIE(InfoExtractor):
if data.get('type') == 'stream': if data.get('type') == 'stream':
m3u8_url = data['data']['uri'] m3u8_url = data['data']['uri']
qualities = data['data']['availableQualities'] qualities = data['data']['availableQualities']
cookies = data['data']['cookies']
break break
elif data.get('type') == 'disconnect': elif data.get('type') == 'disconnect':
self.write_debug(recv) self.write_debug(recv)
@ -1043,6 +1046,11 @@ class NiconicoLiveIE(InfoExtractor):
**res, **res,
}) })
for cookie in cookies:
self._set_cookie(
cookie['domain'], cookie['name'], cookie['value'],
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
for fmt, q in zip(formats, reversed(qualities[1:])): for fmt, q in zip(formats, reversed(qualities[1:])):
fmt.update({ fmt.update({

View File

@ -14,8 +14,9 @@ from ..utils import (
int_or_none, int_or_none,
parse_qs, parse_qs,
srt_subtitles_timecode, srt_subtitles_timecode,
traverse_obj, url_or_none,
) )
from ..utils.traversal import traverse_obj
class PanoptoBaseIE(InfoExtractor): class PanoptoBaseIE(InfoExtractor):
@ -345,21 +346,16 @@ class PanoptoIE(PanoptoBaseIE):
subtitles = {} subtitles = {}
for stream in streams or []: for stream in streams or []:
stream_formats = [] stream_formats = []
http_stream_url = stream.get('StreamHttpUrl') for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
stream_url = stream.get('StreamUrl')
if http_stream_url:
stream_formats.append({'url': http_stream_url})
if stream_url:
media_type = stream.get('ViewerMediaFileTypeName') media_type = stream.get('ViewerMediaFileTypeName')
if media_type in ('hls', ): if media_type in ('hls', ):
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id) fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
stream_formats.extend(m3u8_formats) stream_formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, stream_subtitles) self._merge_subtitles(subs, target=subtitles)
else: else:
stream_formats.append({ stream_formats.append({
'url': stream_url, 'url': stream_url,
'ext': media_type,
}) })
for fmt in stream_formats: for fmt in stream_formats:
fmt.update({ fmt.update({

101
yt_dlp/extractor/parti.py Normal file
View File

@ -0,0 +1,101 @@
from .common import InfoExtractor
from ..utils import UserNotLive, int_or_none, parse_iso8601, url_or_none, urljoin
from ..utils.traversal import traverse_obj
class PartiBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note=None):
return self._download_json(
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
class PartiVideoIE(PartiBaseIE):
IE_NAME = 'parti:video'
_VALID_URL = r'https?://(?:www\.)?parti\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://parti.com/video/66284',
'info_dict': {
'id': '66284',
'ext': 'mp4',
'title': 'NOW LIVE ',
'upload_date': '20250327',
'categories': ['Gaming'],
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
'channel': 'ItZTMGG',
'timestamp': 1743044379,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._call_api(f'get_livestream_channel_info/recent/{video_id}', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
**traverse_obj(data, {
'title': ('event_title', {str}),
'channel': ('user_name', {str}),
'thumbnail': ('event_file', {url_or_none}),
'categories': ('category_name', {str}, filter, all),
'timestamp': ('event_start_ts', {int_or_none}),
}),
}
class PartiLivestreamIE(PartiBaseIE):
IE_NAME = 'parti:livestream'
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
_TESTS = [{
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
'info_dict': {
'id': 'Capt_Robs_Adventures',
'ext': 'mp4',
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
'view_count': int,
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
'timestamp': 1743879776,
'upload_date': '20250405',
'live_status': 'is_live',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
'only_matching': True,
}]
def _real_extract(self, url):
service, creator_slug = self._match_valid_url(url).group('service', 'id')
encoded_creator_slug = creator_slug.replace('/', '%23')
creator_id = self._call_api(
f'get_user_by_social_media/{service}/{encoded_creator_slug}',
creator_slug, note='Fetching user ID')
data = self._call_api(
f'get_livestream_channel_info/{creator_id}', creator_id,
note='Fetching user profile feed')['channel_info']
if not traverse_obj(data, ('channel', 'is_live', {bool})):
raise UserNotLive(video_id=creator_id)
channel_info = data['channel']
return {
'id': creator_slug,
'formats': self._extract_m3u8_formats(
channel_info['playback_url'], creator_slug, live=True, query={
'token': channel_info['playback_auth_token'],
'player_version': '1.17.0',
}),
'is_live': True,
**traverse_obj(data, {
'title': ('livestream_event_info', 'event_name', {str}),
'description': ('livestream_event_info', 'event_description', {str}),
'thumbnail': ('livestream_event_info', 'livestream_preview_file', {url_or_none}),
'timestamp': ('stream', 'start_time', {parse_iso8601}),
'view_count': ('stream', 'viewer_count', {int_or_none}),
}),
}

View File

@ -1,35 +1,142 @@
import base64 import base64
import io import io
import struct import struct
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
make_archive_id,
parse_iso8601,
qualities, qualities,
remove_end, url_or_none,
remove_start,
try_get,
) )
from ..utils.traversal import subs_list_to_dict, traverse_obj
class RTVEALaCartaIE(InfoExtractor): class RTVEBaseIE(InfoExtractor):
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
@staticmethod
def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length_data = encrypted_data.read(4)
length = struct.unpack('!I', length_data)[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
data = bytes(filter(None, data))
alphabet_data, _, url_data = data.partition(b'#')
quality_str, _, url_data = url_data.rpartition(b'%%')
quality_str = quality_str.decode() or ''
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
url = RTVEBaseIE._get_url(alphabet, url_data)
yield quality_str, url
encrypted_data.read(4) # CRC
@staticmethod
def _get_url(alphabet, url_data):
url = ''
f = 0
e = 3
b = 1
for char in url_data.decode('iso-8859-1'):
if f == 0:
l = int(char) * 10
f = 1
else:
if e == 0:
l += int(char)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
@staticmethod
def _get_alphabet(alphabet_data):
alphabet = []
e = 0
d = 0
for char in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(char)
d = e = (e + 1) % 4
else:
d -= 1
return alphabet
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
formats, subtitles = [], {}
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
for manager in ('rtveplayw', 'default'):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
if not png:
continue
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, 'dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats, subtitles
def _parse_metadata(self, metadata):
return traverse_obj(metadata, {
'title': ('title', {str.strip}),
'alt_title': ('alt', {str.strip}),
'description': ('description', {clean_html}),
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
'duration': ('duration', {float_or_none(scale=1000)}),
'is_live': ('live', {bool}),
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
})
class RTVEALaCartaIE(RTVEBaseIE):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta' IE_DESC = 'RTVE a la carta and Play'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _VALID_URL = [
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 'md5': 'a964547824359a5753aef09d79fe984b',
'info_dict': { 'info_dict': {
'id': '2491869', 'id': '3088905',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 'title': 'En torno a la silla',
'duration': 5024.566, 'duration': 1216.981,
'series': 'Balonmano', 'series': 'La aventura del Saber',
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'note': 'Live stream', 'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True, 'is_live': True,
'live_status': 'is_live',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
}, },
'params': { 'params': {
'skip_download': 'live stream', 'skip_download': 'live stream',
}, },
}, { }, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
'md5': 'd850f3c8731ea53952ebab489cf81cbf', 'md5': 'f3cf0d1902d008c48c793e736706c174',
'info_dict': { 'info_dict': {
'id': '4236788', 'id': '4236788',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Servir y proteger - Capítulo 104', 'title': 'Episodio 104',
'duration': 3222.0, 'duration': 3222.8,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'Servir y proteger',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
'info_dict': {
'id': '16177116',
'ext': 'mp4',
'title': 'Saber vivir - 07/07/24',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 2162.68,
'series': 'Saber vivir',
},
}, {
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
'info_dict': {
'id': '7048976',
'ext': 'mp4',
'title': 'Gusano',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 292.86,
'series': 'Agus & Lui: Churros y Crafts',
'_old_archive_ids': ['rtveinfantil 7048976'],
},
}] }]
def _real_initialize(self): def _get_subtitles(self, video_id):
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8') subtitle_data = self._download_json(
self._manager = self._download_json( f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
'http://www.rtve.es/odin/loki/' + user_agent_b64, 'Downloading subtitles info')
None, 'Fetching manager info')['manager'] return traverse_obj(subtitle_data, ('page', 'items', ..., {
'id': ('lang', {str}),
@staticmethod 'url': ('src', {url_or_none}),
def _decrypt_url(png): }, all, {subs_list_to_dict(lang='es')}))
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
alphabet_data, text = data.split(b'\0')
quality, url_data = text.split(b'%%')
alphabet = []
e = 0
d = 0
for l in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data.decode('iso-8859-1'):
if f == 0:
l = int(letter) * 10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
yield quality.decode(), url
encrypted_data.read(4) # CRC
def _extract_png_formats(self, video_id):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._download_json( metadata = self._download_json(
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
video_id)['page']['items'][0] video_id)['page']['items'][0]
if info['state'] == 'DESPU': if metadata['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True) raise ExtractorError('The video is no longer available', expected=True)
title = info['title'].strip() formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
formats = self._extract_png_formats(video_id)
subtitles = None self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
sbt_file = info.get('sbtFile')
if sbt_file:
subtitles = self.extract_subtitles(video_id, sbt_file)
is_live = info.get('live') is True is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
return { return {
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'thumbnail': info.get('image'),
'subtitles': subtitles, 'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), 1000), **self._parse_metadata(metadata),
'is_live': is_live, '_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
'series': info.get('programTitle'),
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVEAudioIE(RTVEBaseIE):
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:audio' IE_NAME = 'rtve.es:audio'
IE_DESC = 'RTVE audio' IE_DESC = 'RTVE audio'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5889192', 'id': '5889192',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Códigos informáticos', 'title': 'Códigos informáticos',
'thumbnail': r're:https?://.+/1598856591583.jpg', 'alt_title': 'Códigos informáticos - Escuchar ahora',
'duration': 349.440, 'duration': 349.440,
'series': 'A hombros de gigantes', 'series': 'A hombros de gigantes',
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5791165', 'id': '5791165',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Ignatius Farray', 'title': 'Ignatius Farray',
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
'thumbnail': r're:https?://.+/1613243011863.jpg', 'thumbnail': r're:https?://.+/1613243011863.jpg',
'duration': 3559.559, 'duration': 3559.559,
'series': 'En Radio 3', 'series': 'En Radio 3',
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '6082623', 'id': '6082623',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Capítulo 26 y último: La muerte de Victor', 'title': 'Capítulo 26 y último: La muerte de Victor',
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
'thumbnail': r're:https?://.+/1632147445707.jpg', 'thumbnail': r're:https?://.+/1632147445707.jpg',
'duration': 3174.086, 'duration': 3174.086,
'series': 'Frankenstein o el moderno Prometeo', 'series': 'Frankenstein o el moderno Prometeo',
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
}, },
}] }]
def _extract_png_formats(self, audio_id):
"""
This function retrieves media related png thumbnail which obfuscate
valuable information about the media. This information is decrypted
via base class _decrypt_url function providing media quality and
media url
"""
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
audio_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, audio_url in self._decrypt_url(png):
ext = determine_ext(audio_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
audio_url, audio_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
audio_url, audio_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': audio_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
audio_id = self._match_id(url) audio_id = self._match_id(url)
info = self._download_json( metadata = self._download_json(
f'https://www.rtve.es/api/audios/{audio_id}.json', f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
audio_id)['page']['items'][0]
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
return { return {
'id': audio_id, 'id': audio_id,
'title': info['title'].strip(), 'formats': formats,
'thumbnail': info.get('thumbnail'), 'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), 1000), **self._parse_metadata(metadata),
'series': try_get(info, lambda x: x['programInfo']['title']),
'formats': self._extract_png_formats(audio_id),
} }
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE class RTVELiveIE(RTVEBaseIE):
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
_TESTS = [{
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
'info_dict': {
'id': '3040283',
'ext': 'mp4',
'title': 'Maneras de vivir',
'thumbnail': r're:https?://.+/1426182947956\.JPG',
'duration': 357.958,
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}]
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams' IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _VALID_URL = [
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/directo/la-1/', 'url': 'http://www.rtve.es/directo/la-1/',
'info_dict': { 'info_dict': {
'id': 'la-1', 'id': 'la-1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': { 'params': {'skip_download': 'live stream'},
'skip_download': 'live stream', }, {
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
'info_dict': {
'id': 'tdp',
'ext': 'mp4',
'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': {'skip_download': 'live stream'},
}, {
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
vidplayer_id = self._search_regex( data_setup = self._search_json(
(r'playerId=player([0-9]+)', r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', webpage, 'data_setup', video_id)
r'data-id=["\'](\d+)'),
webpage, 'internal video ID') formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
return { return {
'id': video_id, 'id': video_id,
'title': title, **self._search_json_ld(webpage, video_id, fatal=False),
'formats': self._extract_png_formats(vidplayer_id), 'title': self._html_extract_title(webpage),
'formats': formats,
'subtitles': subtitles,
'is_live': True, 'is_live': True,
} }
class RTVETelevisionIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor):
IE_NAME = 'rtve.es:television' IE_NAME = 'rtve.es:television'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
_TEST = { _TEST = {
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
'info_dict': { 'info_dict': {
'id': '3069778', 'id': '572515',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Documentos TV - La revolución del móvil', 'title': 'Clase inédita',
'duration': 3496.948, 'duration': 335.817,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'El coro de la cárcel',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -332,11 +366,8 @@ class RTVETelevisionIE(InfoExtractor):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
alacarta_url = self._search_regex( play_url = self._html_search_meta('contentUrl', webpage)
r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&', if play_url is None:
webpage, 'alacarta url', default=None) raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
if alacarta_url is None:
raise ExtractorError(
'The webpage doesn\'t contain any video', expected=True)
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())

View File

@ -7,7 +7,6 @@ from ..utils import (
ExtractorError, ExtractorError,
UnsupportedError, UnsupportedError,
clean_html, clean_html,
determine_ext,
extract_attributes, extract_attributes,
format_field, format_field,
get_element_by_class, get_element_by_class,
@ -36,7 +35,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20191020', 'upload_date': '20191020',
'channel_url': 'https://rumble.com/c/WMAR', 'channel_url': 'https://rumble.com/c/WMAR',
'channel': 'WMAR', 'channel': 'WMAR',
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 234, 'duration': 234,
'uploader': 'WMAR', 'uploader': 'WMAR',
'live_status': 'not_live', 'live_status': 'not_live',
@ -52,7 +51,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20220217', 'upload_date': '20220217',
'channel_url': 'https://rumble.com/c/CyberTechNews', 'channel_url': 'https://rumble.com/c/CyberTechNews',
'channel': 'CTNews', 'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 901, 'duration': 901,
'uploader': 'CTNews', 'uploader': 'CTNews',
'live_status': 'not_live', 'live_status': 'not_live',
@ -114,6 +113,22 @@ class RumbleEmbedIE(InfoExtractor):
'live_status': 'was_live', 'live_status': 'was_live',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'url': 'https://rumble.com/embed/v6pezdb',
'info_dict': {
'id': 'v6pezdb',
'ext': 'mp4',
'title': '"Es war einmal ein Mädchen" Ein filmisches Zeitzeugnis aus Leningrad 1944',
'uploader': 'RT DE',
'channel': 'RT DE',
'channel_url': 'https://rumble.com/c/RTDE',
'duration': 309,
'thumbnail': 'https://1a-1791.com/video/fww1/dc/s8/1/n/z/2/y/nz2yy.qR4e-small-Es-war-einmal-ein-Mdchen-Ei.jpg',
'timestamp': 1743703500,
'upload_date': '20250403',
'live_status': 'not_live',
},
'params': {'skip_download': True},
}, { }, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True, 'only_matching': True,
@ -168,40 +183,42 @@ class RumbleEmbedIE(InfoExtractor):
live_status = None live_status = None
formats = [] formats = []
for ext, ext_info in (video.get('ua') or {}).items(): for format_type, format_info in (video.get('ua') or {}).items():
if isinstance(ext_info, dict): if isinstance(format_info, dict):
for height, video_info in ext_info.items(): for height, video_info in format_info.items():
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
video_info.setdefault('meta', {})['h'] = height video_info.setdefault('meta', {})['h'] = height
ext_info = ext_info.values() format_info = format_info.values()
for video_info in ext_info: for video_info in format_info:
meta = video_info.get('meta') or {} meta = video_info.get('meta') or {}
if not video_info.get('url'): if not video_info.get('url'):
continue continue
if ext == 'hls': # With default query params returns m3u8 variants which are duplicates, without returns tar files
if format_type == 'tar':
continue
if format_type == 'hls':
if meta.get('live') is True and video.get('live') == 1: if meta.get('live') is True and video.get('live') == 1:
live_status = 'post_live' live_status = 'post_live'
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_info['url'], video_id, video_info['url'], video_id,
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
continue continue
timeline = ext == 'timeline' is_timeline = format_type == 'timeline'
if timeline: is_audio = format_type == 'audio'
ext = determine_ext(video_info['url'])
formats.append({ formats.append({
'ext': ext, 'acodec': 'none' if is_timeline else None,
'acodec': 'none' if timeline else None, 'vcodec': 'none' if is_audio else None,
'url': video_info['url'], 'url': video_info['url'],
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), 'format_id': join_nonempty(format_type, format_field(meta, 'h', '%sp')),
'format_note': 'Timeline' if timeline else None, 'format_note': 'Timeline' if is_timeline else None,
'fps': None if timeline else video.get('fps'), 'fps': None if is_timeline or is_audio else video.get('fps'),
**traverse_obj(meta, { **traverse_obj(meta, {
'tbr': 'bitrate', 'tbr': ('bitrate', {int_or_none}),
'filesize': 'size', 'filesize': ('size', {int_or_none}),
'width': 'w', 'width': ('w', {int_or_none}),
'height': 'h', 'height': ('h', {int_or_none}),
}, expected_type=lambda x: int(x) or None), }),
}) })
subtitles = { subtitles = {

View File

@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):
class TVPVODVideoIE(TVPVODBaseIE): class TVPVODVideoIE(TVPVODBaseIE):
IE_NAME = 'tvp:vod' IE_NAME = 'tvp:vod'
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)' _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': 're:https?://.+', 'thumbnail': 're:https?://.+',
}, },
}, {
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,13 +1,21 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none from ..utils import (
from ..utils.traversal import traverse_obj clean_html,
extract_attributes,
parse_qs,
remove_end,
require,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
class TvwIE(InfoExtractor): class TvwIE(InfoExtractor):
IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703', 'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -115,3 +123,43 @@ class TvwIE(InfoExtractor):
'is_live': ('eventStatus', {lambda x: x == 'live'}), 'is_live': ('eventStatus', {lambda x: x == 'live'}),
}), }),
} }
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/tvchannels/air/',
'info_dict': {
'id': 'air',
'ext': 'mp4',
'title': r're:TVW Cable Channel Live Stream',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}, {
'url': 'https://tvw.org/tvchannels/tvw2/',
'info_dict': {
'id': 'tvw2',
'ext': 'mp4',
'title': r're:TVW-2 Broadcast Channel',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = traverse_obj(webpage, (
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'is_live': True,
}

View File

@ -14,12 +14,13 @@ from ..utils import (
parse_duration, parse_duration,
qualities, qualities,
str_to_int, str_to_int,
traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
class TwitCastingIE(InfoExtractor): class TwitCastingIE(InfoExtractor):
@ -138,13 +139,7 @@ class TwitCastingIE(InfoExtractor):
r'data-toggle="true"[^>]+datetime="([^"]+)"', r'data-toggle="true"[^>]+datetime="([^"]+)"',
webpage, 'datetime', None)) webpage, 'datetime', None))
stream_server_data = self._download_json(
f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id,
'Downloading live info', fatal=False)
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"']) is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
self.raise_login_required(method='cookies')
base_dict = { base_dict = {
'title': title, 'title': title,
@ -165,28 +160,37 @@ class TwitCastingIE(InfoExtractor):
return [data_movie_url] return [data_movie_url]
m3u8_urls = (try_get(webpage, find_dmu, list) m3u8_urls = (try_get(webpage, find_dmu, list)
or traverse_obj(video_js_data, (..., 'source', 'url')) or traverse_obj(video_js_data, (..., 'source', 'url')))
or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
if not m3u8_urls:
raise ExtractorError('Failed to get m3u8 playlist')
if is_live: if is_live:
m3u8_url = m3u8_urls[0] stream_data = self._download_json(
formats = self._extract_m3u8_formats( 'https://twitcasting.tv/streamserver.php',
m3u8_url, video_id, ext='mp4', m3u8_id='hls', video_id, 'Downloading live info', query={
live=True, headers=self._M3U8_HEADERS) 'target': uploader_id,
'mode': 'client',
'player': 'pc_web',
})
if traverse_obj(stream_server_data, ('hls', 'source')): formats = []
formats.extend(self._extract_m3u8_formats( # low: 640x360, medium: 1280x720, high: 1920x1080
m3u8_url, video_id, ext='mp4', m3u8_id='source', qq = qualities(['low', 'medium', 'high'])
live=True, query={'mode': 'source'}, for quality, m3u8_url in traverse_obj(stream_data, (
note='Downloading source quality m3u8', 'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
headers=self._M3U8_HEADERS, fatal=False)) )):
formats.append({
'url': m3u8_url,
'format_id': f'hls-{quality}',
'ext': 'mp4',
'quality': qq(quality),
'protocol': 'm3u8',
'http_headers': self._M3U8_HEADERS,
})
if websockets: if websockets:
qq = qualities(['base', 'mobilesource', 'main']) qq = qualities(['base', 'mobilesource', 'main'])
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {} for mode, ws_url in traverse_obj(stream_data, (
for mode, ws_url in streams.items(): 'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({ formats.append({
'url': ws_url, 'url': ws_url,
'format_id': f'ws-{mode}', 'format_id': f'ws-{mode}',
@ -197,10 +201,15 @@ class TwitCastingIE(InfoExtractor):
'protocol': 'websocket_frag', 'protocol': 'websocket_frag',
}) })
if not formats:
self.raise_login_required()
infodict = { infodict = {
'formats': formats, 'formats': formats,
'_format_sort_fields': ('source', ), '_format_sort_fields': ('source', ),
} }
elif not m3u8_urls:
raise ExtractorError('Failed to get m3u8 playlist')
elif len(m3u8_urls) == 1: elif len(m3u8_urls) == 1:
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS) m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)

View File

@ -244,9 +244,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'), join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={ video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}', 'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json', 'Accept': 'application/vnd.vimeo.*+json;version=3.4.10',
}, query={ }, query={
# TODO: Reverse-engineer generating the 'anon_signature' param
# Ref: https://f.vimeocdn.com/js_opt/app/vimeo-next/_next/static/chunks/60908-af70235e46909bce.js
'outro': 'beginning', # Needed to avoid https://github.com/yt-dlp/yt-dlp/issues/12974
'fields': ','.join(( 'fields': ','.join((
# 'embed_player_config_url' is a viable alternative to 'config_url'
'config_url', 'created_time', 'description', 'download', 'license', 'config_url', 'created_time', 'description', 'download', 'license',
'metadata.connections.comments.total', 'metadata.connections.likes.total', 'metadata.connections.comments.total', 'metadata.connections.likes.total',
'release_time', 'stats.plays')), 'release_time', 'stats.plays')),
@ -410,6 +414,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 10, 'duration': 10,
'comment_count': int, 'comment_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d', 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
}, },
'params': { 'params': {
@ -500,15 +505,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'The DMCI', 'uploader': 'The DMCI',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
'uploader_id': 'dmci', 'uploader_id': 'dmci',
'timestamp': 1324343742, 'timestamp': 1324361742,
'upload_date': '20111220', 'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147', 'description': 'md5:f37b4ad0f3ded6fa16f38ecde16c3c44',
'duration': 60, 'duration': 60,
'comment_count': int, 'comment_count': int,
'view_count': int, 'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d', 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
'like_count': int, 'like_count': int,
'tags': 'count:11', 'release_timestamp': 1324361742,
'release_date': '20111220',
}, },
# 'params': {'format': 'Original'}, # 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
@ -521,15 +527,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '393756517', 'id': '393756517',
# 'ext': 'mov', # 'ext': 'mov',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1582642091, 'timestamp': 1582660091,
'uploader_id': 'frameworkla', 'uploader_id': 'frameworkla',
'title': 'Straight To Hell - Sabrina: Netflix', 'title': 'Straight To Hell - Sabrina: Netflix',
'uploader': 'Framework Studio', 'uploader': 'Framework Studio',
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
'upload_date': '20200225', 'upload_date': '20200225',
'duration': 176, 'duration': 176,
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d', 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
'uploader_url': 'https://vimeo.com/frameworkla', 'uploader_url': 'https://vimeo.com/frameworkla',
'comment_count': int,
'like_count': int,
'release_timestamp': 1582660091,
'release_date': '20200225',
}, },
# 'params': {'format': 'source'}, # 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
@ -630,7 +639,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': str, # FIXME: Dynamic SEO spam description 'description': str, # FIXME: Dynamic SEO spam description
'upload_date': '20150209', 'upload_date': '20150209',
'timestamp': 1423518307, 'timestamp': 1423518307,
'thumbnail': 'https://i.vimeocdn.com/video/default', 'thumbnail': r're:https://i\.vimeocdn\.com/video/default',
'duration': 10, 'duration': 10,
'like_count': int, 'like_count': int,
'uploader_url': 'https://vimeo.com/user20132939', 'uploader_url': 'https://vimeo.com/user20132939',
@ -667,6 +676,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int, 'like_count': int,
'uploader_url': 'https://vimeo.com/aliniamedia', 'uploader_url': 'https://vimeo.com/aliniamedia',
'release_date': '20160329', 'release_date': '20160329',
'view_count': int,
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
@ -678,18 +688,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
# 'ext': 'm4v', # 'ext': 'm4v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!', 'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1', 'description': 'md5:9441e6829ae94f380cc6417d982f63ac',
'uploader_id': 'fireworkchampions', 'uploader_id': 'fireworkchampions',
'uploader': 'Firework Champions', 'uploader': 'Firework Champions',
'upload_date': '20150910', 'upload_date': '20150910',
'timestamp': 1441901895, 'timestamp': 1441916295,
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d', 'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
'uploader_url': 'https://vimeo.com/fireworkchampions', 'uploader_url': 'https://vimeo.com/fireworkchampions',
'tags': 'count:6',
'duration': 229, 'duration': 229,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'release_timestamp': 1441916295,
'release_date': '20150910',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -820,7 +831,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Raja Virdi', 'uploader': 'Raja Virdi',
'uploader_id': 'rajavirdi', 'uploader_id': 'rajavirdi',
'uploader_url': 'https://vimeo.com/rajavirdi', 'uploader_url': 'https://vimeo.com/rajavirdi',
'duration': 309, 'duration': 300,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d', 'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
}, },
# 'params': {'format': 'source'}, # 'params': {'format': 'source'},
@ -1122,7 +1133,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998', 'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
'upload_date': '20140906', 'upload_date': '20140906',
'timestamp': 1410032453, 'timestamp': 1410032453,
'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280', 'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'comment_count': int, 'comment_count': int,
'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/', 'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
'duration': 53, 'duration': 53,
@ -1132,7 +1143,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
}, },
'expected_warnings': ['Unable to download JSON metadata'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
}, { }, {
# requires Referer to be passed along with og:video:url # requires Referer to be passed along with og:video:url
'url': 'https://vimeo.com/ondemand/36938/126682985', 'url': 'https://vimeo.com/ondemand/36938/126682985',
@ -1149,13 +1160,14 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'duration': 121, 'duration': 121,
'comment_count': int, 'comment_count': int,
'view_count': int, 'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280', 'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'like_count': int, 'like_count': int,
'tags': 'count:5',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download JSON metadata'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
}, { }, {
'url': 'https://vimeo.com/ondemand/nazmaalik', 'url': 'https://vimeo.com/ondemand/nazmaalik',
'only_matching': True, 'only_matching': True,
@ -1237,7 +1249,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/nkistudio/videos', 'url': 'https://vimeo.com/nkistudio/videos',
'info_dict': { 'info_dict': {
'title': 'Nki', 'title': 'AKAMA',
'id': 'nkistudio', 'id': 'nkistudio',
}, },
'playlist_mincount': 66, 'playlist_mincount': 66,
@ -1370,10 +1382,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'uploader_id': 'user170863801', 'uploader_id': 'user170863801',
'uploader_url': 'https://vimeo.com/user170863801', 'uploader_url': 'https://vimeo.com/user170863801',
'duration': 30, 'duration': 30,
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280', 'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
}, { }, {
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253', 'md5': 'c507a72f780cacc12b2248bb4006d253',
@ -1528,20 +1540,22 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'uploader_id': 'openstreetmapus', 'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US', 'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
'description': 'md5:2c362968038d4499f4d79f88458590c1', 'description': 'md5:8cf69a1a435f2d763f4adf601e9c3125',
'duration': 1595, 'duration': 1595,
'upload_date': '20130610', 'upload_date': '20130610',
'timestamp': 1370893156, 'timestamp': 1370907556,
'license': 'by', 'license': 'by',
'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', 'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'like_count': int, 'like_count': int,
'tags': 'count:1', 'release_timestamp': 1370907556,
'release_date': '20130610',
}, },
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, { }, {
# password-protected VimeoPro page with Vimeo player embed # password-protected VimeoPro page with Vimeo player embed
'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion', 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
@ -1549,7 +1563,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'id': '764543723', 'id': '764543723',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben', 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280', 'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420', 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
'uploader': 'CADFEM', 'uploader': 'CADFEM',
'uploader_id': 'cadfem', 'uploader_id': 'cadfem',
@ -1561,6 +1575,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'videopassword': 'Conference2022', 'videopassword': 'Conference2022',
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):

20
yt_dlp/extractor/vk.py Normal file → Executable file
View File

@ -300,6 +300,24 @@ class VKIE(VKBaseIE):
'upload_date': '20250130', 'upload_date': '20250130',
}, },
}, },
{
'url': 'https://vkvideo.ru/video-50883936_456244102',
'info_dict': {
'id': '-50883936_456244102',
'ext': 'mp4',
'title': 'Добивание Украины // Техник в коме // МОЯ ЗЛОСТЬ №140',
'description': 'md5:a9bc46181e9ebd0fdd82cef6c0191140',
'uploader': 'Стас Ай, Как Просто!',
'uploader_id': '-50883936',
'comment_count': int,
'like_count': int,
'duration': 4651,
'thumbnail': r're:https?://.+\.jpg',
'chapters': 'count:59',
'timestamp': 1743333869,
'upload_date': '20250330',
},
},
{ {
# live stream, hls and rtmp links, most likely already finished live # live stream, hls and rtmp links, most likely already finished live
# stream by the time you are reading this comment # stream by the time you are reading this comment
@ -540,7 +558,7 @@ class VKIE(VKBaseIE):
'title': ('md_title', {unescapeHTML}), 'title': ('md_title', {unescapeHTML}),
'description': ('description', {clean_html}, filter), 'description': ('description', {clean_html}, filter),
'thumbnail': ('jpg', {url_or_none}), 'thumbnail': ('jpg', {url_or_none}),
'uploader': ('md_author', {str}), 'uploader': ('md_author', {unescapeHTML}),
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any), 'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), { 'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {

View File

@ -2,15 +2,17 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
bug_reports_message,
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
parse_qs, parse_qs,
traverse_obj, qualities,
try_get, try_get,
update_url_query,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class YandexVideoIE(InfoExtractor): class YandexVideoIE(InfoExtractor):
@ -186,7 +188,22 @@ class YandexVideoPreviewIE(InfoExtractor):
return self.url_result(data_json['video']['url']) return self.url_result(data_json['video']['url'])
class ZenYandexIE(InfoExtractor): class ZenYandexBaseIE(InfoExtractor):
def _fetch_ssr_data(self, url, video_id):
webpage = self._download_webpage(url, video_id)
redirect = self._search_json(
r'(?:var|let|const)\s+it\s*=', webpage, 'redirect', video_id, default={}).get('retpath')
if redirect:
video_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
return video_id, self._search_json(
r'(?:var|let|const)\s+_params\s*=\s*\(', webpage, 'metadata', video_id,
contains_pattern=r'{["\']ssrData.+}')['ssrData']
class ZenYandexIE(ZenYandexBaseIE):
IE_NAME = 'dzen.ru'
IE_DESC = 'Дзен (dzen) formerly Яндекс.Дзен (Yandex Zen)'
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)' _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7', 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
@ -216,6 +233,7 @@ class ZenYandexIE(InfoExtractor):
'timestamp': 1573465585, 'timestamp': 1573465585,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'The page does not exist',
}, { }, {
'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3', 'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
'info_dict': { 'info_dict': {
@ -227,6 +245,9 @@ class ZenYandexIE(InfoExtractor):
'uploader': 'TechInsider', 'uploader': 'TechInsider',
'timestamp': 1611378221, 'timestamp': 1611378221,
'upload_date': '20210123', 'upload_date': '20210123',
'view_count': int,
'duration': 243,
'tags': ['опыт', 'эксперимент', 'огонь'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -240,6 +261,9 @@ class ZenYandexIE(InfoExtractor):
'uploader': 'TechInsider', 'uploader': 'TechInsider',
'upload_date': '20210123', 'upload_date': '20210123',
'timestamp': 1611378221, 'timestamp': 1611378221,
'view_count': int,
'duration': 243,
'tags': ['опыт', 'эксперимент', 'огонь'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -252,44 +276,56 @@ class ZenYandexIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_id, ssr_data = self._fetch_ssr_data(url, video_id)
redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath') video_data = ssr_data['videoMetaResponse']
if redirect:
video_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
data_json = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
webpage, 'uploader', default='<a>')
uploader_name = extract_attributes(uploader).get('aria-label')
item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
formats, subtitles = [], {} formats, subtitles = [], {}
for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})): quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
# Deduplicate stream URLs. The "dzen_dash" query parameter is present in some URLs but can be omitted
stream_urls = set(traverse_obj(video_data, (
'video', ('id', ('streams', ...), ('mp4Streams', ..., 'url'), ('oneVideoStreams', ..., 'url')),
{url_or_none}, {update_url_query(query={'dzen_dash': []})})))
for s_url in stream_urls:
ext = determine_ext(s_url) ext = determine_ext(s_url)
if ext == 'mpd': content_type = traverse_obj(parse_qs(s_url), ('ct', 0))
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash') if ext == 'mpd' or content_type == '6':
elif ext == 'm3u8': fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash', fatal=False)
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4') elif ext == 'm3u8' or content_type == '8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif content_type == '0':
format_type = traverse_obj(parse_qs(s_url), ('type', 0))
formats.append({
'url': s_url,
'format_id': format_type,
'ext': 'mp4',
'quality': quality(format_type),
})
continue
else:
self.report_warning(f'Unsupported stream URL: {s_url}{bug_reports_message()}')
continue
formats.extend(fmts) formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs) self._merge_subtitles(subs, target=subtitles)
return { return {
'id': video_id, 'id': video_id,
'title': video_json.get('title') or self._og_search_title(webpage),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': int_or_none(video_json.get('duration')), **traverse_obj(video_data, {
'view_count': int_or_none(video_json.get('views')), 'title': ('title', {str}),
'timestamp': int_or_none(video_json.get('publicationDate')), 'description': ('description', {str}),
'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']), 'thumbnail': ('image', {url_or_none}),
'description': video_json.get('description') or self._og_search_description(webpage), 'duration': ('video', 'duration', {int_or_none}),
'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']), 'view_count': ('video', 'views', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none}),
'tags': ('tags', ..., {str}),
'uploader': ('source', 'title', {str}),
}),
} }
class ZenYandexChannelIE(InfoExtractor): class ZenYandexChannelIE(ZenYandexBaseIE):
IE_NAME = 'dzen.ru:channel'
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)' _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://zen.yandex.ru/tok_media', 'url': 'https://zen.yandex.ru/tok_media',
@ -323,8 +359,8 @@ class ZenYandexChannelIE(InfoExtractor):
'url': 'https://zen.yandex.ru/jony_me', 'url': 'https://zen.yandex.ru/jony_me',
'info_dict': { 'info_dict': {
'id': 'jony_me', 'id': 'jony_me',
'description': 'md5:ce0a5cad2752ab58701b5497835b2cc5', 'description': 'md5:7c30d11dc005faba8826feae99da3113',
'title': 'JONY ', 'title': 'JONY',
}, },
'playlist_count': 18, 'playlist_count': 18,
}, { }, {
@ -333,9 +369,8 @@ class ZenYandexChannelIE(InfoExtractor):
'url': 'https://zen.yandex.ru/tatyanareva', 'url': 'https://zen.yandex.ru/tatyanareva',
'info_dict': { 'info_dict': {
'id': 'tatyanareva', 'id': 'tatyanareva',
'description': 'md5:40a1e51f174369ec3ba9d657734ac31f', 'description': 'md5:92e56fa730a932ca2483ba5c2186ad96',
'title': 'Татьяна Рева', 'title': 'Татьяна Рева',
'entries': 'maxcount:200',
}, },
'playlist_mincount': 46, 'playlist_mincount': 46,
}, { }, {
@ -348,43 +383,31 @@ class ZenYandexChannelIE(InfoExtractor):
'playlist_mincount': 657, 'playlist_mincount': 657,
}] }]
def _entries(self, item_id, server_state_json, server_settings_json): def _entries(self, feed_data, channel_id):
items = (traverse_obj(server_state_json, ('feed', 'items', ...))
or traverse_obj(server_settings_json, ('exportData', 'items', ...)))
more = (traverse_obj(server_state_json, ('links', 'more'))
or traverse_obj(server_settings_json, ('exportData', 'more', 'link')))
next_page_id = None next_page_id = None
for page in itertools.count(1): for page in itertools.count(1):
for item in items or []: for item in traverse_obj(feed_data, (
if item.get('type') != 'gif': (None, ('items', lambda _, v: v['tab'] in ('shorts', 'longs'))),
continue 'items', lambda _, v: url_or_none(v['link']),
video_id = traverse_obj(item, 'publication_id', 'publicationId') or '' )):
yield self.url_result(item['link'], ZenYandexIE, video_id.split(':')[-1]) yield self.url_result(item['link'], ZenYandexIE, item.get('id'), title=item.get('title'))
more = traverse_obj(feed_data, ('more', 'link', {url_or_none}))
current_page_id = next_page_id current_page_id = next_page_id
next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1)) next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
if not all((more, items, next_page_id, next_page_id != current_page_id)): if not all((more, next_page_id, next_page_id != current_page_id)):
break break
data = self._download_json(more, item_id, note=f'Downloading Page {page}') feed_data = self._download_json(more, channel_id, note=f'Downloading Page {page}')
items, more = data.get('items'), traverse_obj(data, ('more', 'link'))
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url) channel_id = self._match_id(url)
webpage = self._download_webpage(url, item_id) channel_id, ssr_data = self._fetch_ssr_data(url, channel_id)
redirect = self._search_json( channel_data = ssr_data['exportResponse']
r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath')
if redirect:
item_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, item_id, note='Redirecting')
data = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'channel data', item_id, contains_pattern=r'{\"__serverState__.+}')
server_state_json = traverse_obj(data, lambda k, _: k.startswith('__serverState__'), get_all=False)
server_settings_json = traverse_obj(data, lambda k, _: k.startswith('__serverSettings__'), get_all=False)
return self.playlist_result( return self.playlist_result(
self._entries(item_id, server_state_json, server_settings_json), self._entries(channel_data['feedData'], channel_id),
item_id, traverse_obj(server_state_json, ('channel', 'source', 'title')), channel_id, **traverse_obj(channel_data, ('channel', 'source', {
traverse_obj(server_state_json, ('channel', 'source', 'description'))) 'title': ('title', {str}),
'description': ('description', {str}),
})))

View File

@ -524,10 +524,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
response = self._extract_response( response = self._extract_response(
item_id=f'{item_id} page {page_num}', item_id=f'{item_id} page {page_num}',
query=continuation, headers=headers, ytcfg=ytcfg, query=continuation, headers=headers, ytcfg=ytcfg,
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) check_get_keys=(
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
# Playlist recommendations may return with no data - ignore
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
))
if not response: if not response:
break break
continuation = None
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
# See: https://github.com/ytdl-org/youtube-dl/issues/28702 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self._extract_visitor_data(response) or visitor_data visitor_data = self._extract_visitor_data(response) or visitor_data
@ -564,7 +570,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield from func(video_items_renderer) yield from func(video_items_renderer)
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
if not video_items_renderer: # In the case only a continuation is returned, try to follow it.
# We extract this after trying to extract non-continuation items as otherwise this
# may be prioritized over other continuations.
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
if not continuation and not video_items_renderer:
break break
@staticmethod @staticmethod
@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'playlists, series', 'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'basic, single video playlist', 'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': { 'info_dict': {
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'title': 'youtube-dl public playlist', 'title': 'single video playlist',
'description': '', 'description': '',
'tags': [], 'tags': [],
'view_count': int, 'view_count': int,
'modified_date': '20201130', 'modified_date': '20250417',
'channel': 'Sergey M.', 'channel': 'cole-dlp-test-acc',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'availability': 'public', 'availability': 'public',
'uploader': 'Sergey M.', 'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/@sergeym.6173', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@sergeym.6173', 'uploader_id': '@coletdjnz',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 17, 'playlist_mincount': 17,
}, { }, {
'note': 'Community tab', 'note': 'Posts tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': { 'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community', 'title': 'lex will - Posts',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will', 'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 18, 'playlist_mincount': 18,
}, { }, {
'note': 'Channels tab', # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'channel_follower_count': int,
'uploader_url': 'https://www.youtube.com/@lexwill718',
'uploader_id': '@lexwill718',
'uploader': 'lex will',
},
'playlist_mincount': 12,
}, {
'note': 'Search tab', 'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40, 'playlist_mincount': 40,
'info_dict': { 'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra', 'title': '3Blue1Brown - Search - linear algebra',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'], 'tags': ['Mathematics'],
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button', 'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', 'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'info_dict': { 'info_dict': {
'title': 'Uploads from Phim Siêu Nhân Nhật Bản', 'title': 'The Memes Of 2010s.....',
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', 'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'view_count': int, 'view_count': int,
'channel': 'Phim Siêu Nhân Nhật Bản', 'channel': "I'm Not JiNxEd",
'tags': [], 'tags': [],
'description': '', 'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public', 'availability': 'public',
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban', 'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
'uploader_id': '@phimsieunhannhatban', 'uploader_id': '@imnotjinxed1998',
'uploader': 'Phim Siêu Nhân Nhật Bản', 'uploader': "I'm Not JiNxEd",
}, },
'playlist_mincount': 200, 'playlist_mincount': 150,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
'note': 'Playlist with unavailable videos in page 7', 'note': 'Playlist with unavailable videos in page 7',
@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1000, 'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix availability extraction
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': { 'info_dict': {
@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': { 'info_dict': {
'id': 'hGkQjiJLjWQ', # This will keep changing 'id': 'YDvsBbKfLPA', # This will keep changing
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'upload_date': r're:\d{8}', 'upload_date': r're:\d{8}',
@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@SkyNews', 'uploader_id': '@SkyNews',
'uploader': 'Sky News', 'uploader': 'Sky News',
'channel_is_verified': True, 'channel_is_verified': True,
'media_type': 'livestream',
'timestamp': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'VLPL, should redirect to playlist?list=PL...', 'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'info_dict': { 'info_dict': {
@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
# Treat as a general feed # Treat as a general feed
# TODO: fix extraction
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': { 'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['YouTube Music is not directly supported'], 'expected_warnings': ['YouTube Music is not directly supported'],
}, { }, {
'note': 'unlisted single video playlist', 'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': { 'info_dict': {
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'title': 'yt-dlp unlisted playlist test', 'title': 'unlisted playlist',
'availability': 'unlisted', 'availability': 'unlisted',
'tags': [], 'tags': [],
'modified_date': '20220418', 'modified_date': '20250417',
'channel': 'colethedj', 'channel': 'cole-dlp-test-acc',
'view_count': int, 'view_count': int,
'description': '', 'description': '',
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader_url': 'https://www.youtube.com/@colethedj1894', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@colethedj1894', 'uploader_id': '@coletdjnz',
'uploader': 'colethedj', 'uploader': 'cole-dlp-test-acc',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# By default, recommended is always empty.
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
'url': 'https://www.youtube.com/feed/recommended', 'url': 'https://www.youtube.com/feed/recommended',
'info_dict': { 'info_dict': {
@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'recommended', 'title': 'recommended',
'tags': [], 'tags': [],
}, },
'playlist_mincount': 50, 'playlist_count': 0,
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}}, 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'skip': 'Query for sorting no longer works', 'skip': 'Query for sorting no longer works',
}, { }, {
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
'info_dict': { 'info_dict': {
@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': { 'info_dict': {
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'modified_date': '20220407', 'modified_date': '20250115',
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
'tags': [], 'tags': [],
'availability': 'unlisted', 'availability': 'unlisted',
@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['Preferring "ja"'], 'expected_warnings': ['Preferring "ja"'],
}, { }, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that # XXX: this should really check flat playlist entries, but the test suite doesn't support that
# TODO: fix availability extraction
'note': 'preferred lang set with playlist with translated video titles', 'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': { 'info_dict': {
@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# shorts audio pivot for 2GtVksBMYFM. # shorts audio pivot for 2GtVksBMYFM.
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==', 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
# TODO: fix extraction
'info_dict': { 'info_dict': {
'id': 'sfv_audio_pivot', 'id': 'sfv_audio_pivot',
'title': 'sfv_audio_pivot', 'title': 'sfv_audio_pivot',
@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 8, 'playlist_mincount': 8,
}, { }, {
# Should get three playlists for videos, shorts and streams tabs # Should get three playlists for videos, shorts and streams tabs
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': { 'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2', 'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
'channel': 'Polka Ch. 尾丸ポルカ', 'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35', 'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka', 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 3, 'playlist_count': 3,
}, { }, {
# Shorts tab with channel with handle # Shorts tab with channel with handle
# TODO: fix channel description # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@NotJustBikes/shorts', 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': { 'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A', 'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts', 'title': 'Not Just Bikes - Shorts',
'tags': 'count:10', 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:5e82545b3a041345927a92d0585df247', 'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes', 'channel': 'Not Just Bikes',
@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一', 'channel': '中村悠一',
'channel_follower_count': int, 'channel_follower_count': int,
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300', 'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura', 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura', 'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一', 'uploader': '中村悠一',
@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
# No videos tab but has a shorts tab # No videos tab but has a shorts tab
# TODO: fix metadata extraction
'url': 'https://www.youtube.com/c/TKFShorts', 'url': 'https://www.youtube.com/c/TKFShorts',
'info_dict': { 'info_dict': {
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg', 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Shorts url result in shorts tab # Shorts url result in shorts tab
# TODO: Fix channel id extraction # TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# Live video status should be extracted # Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': { 'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg', 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
# Channel renderer metadata. Contains number of videos on the channel # Channel renderer metadata. Contains number of videos on the channel
# TODO: channels tab removed, change this test to use another page with channel renderer
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
'skip': 'channels tab removed',
}, { }, {
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@3blue1brown/about', 'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': { 'info_dict': {
'id': '@3blue1brown', 'id': '@3blue1brown',
@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'uploader_url': 'https://www.youtube.com/@3blue1brown', 'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown', 'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown', 'uploader': '3Blue1Brown',
@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 5, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',
'info_dict': { 'info_dict': {
'id': 'UCgFwu-j5-xNJml2FtTrrB3A', 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 100, 'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'Tags containing spaces', 'note': 'Tags containing spaces',
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'playlist_count': 3, 'playlist_count': 3,
@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'challenges', 'sketches', 'scary games', 'funny games', 'rage games', 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
'mark fischbach'], 'mark fischbach'],
}, },
}, {
# https://github.com/yt-dlp/yt-dlp/issues/12933
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
'url': 'https://www.youtube.com/@sbcitygov/streams',
'playlist_mincount': 150,
'info_dict': {
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'channel': 'sbcitygov',
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'title': 'sbcitygov - Live',
'channel_follower_count': int,
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
'uploader_id': '@sbcitygov',
'uploader_url': 'https://www.youtube.com/@sbcitygov',
'uploader': 'sbcitygov',
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
'tags': [],
},
}] }]
@classmethod @classmethod

View File

@ -3646,6 +3646,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'sign in' in reason.lower(): if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more') reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}' reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
reason += '. YouTube is requiring a captcha challenge before playback'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -3874,7 +3876,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not traverse_obj(initial_data, 'contents'): if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.') self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None initial_data = None
if not initial_data: if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
query = {'videoId': video_id} query = {'videoId': video_id}
query.update(self._get_checkok_params()) query.update(self._get_checkok_params())
initial_data = self._extract_response( initial_data = self._extract_response(

View File

@ -3,6 +3,7 @@ import warnings
from .common import ( from .common import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,

View File

@ -505,6 +505,7 @@ class Request:
HEADRequest = functools.partial(Request, method='HEAD') HEADRequest = functools.partial(Request, method='HEAD')
PATCHRequest = functools.partial(Request, method='PATCH')
PUTRequest = functools.partial(Request, method='PUT') PUTRequest = functools.partial(Request, method='PUT')

View File

@ -150,6 +150,15 @@ class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter):
return opts return opts
_PRESET_ALIASES = {
'mp3': ['-f', 'ba[acodec^=mp3]/ba/b', '-x', '--audio-format', 'mp3'],
'aac': ['-f', 'ba[acodec^=aac]/ba[acodec^=mp4a.40.]/ba/b', '-x', '--audio-format', 'aac'],
'mp4': ['--merge-output-format', 'mp4', '--remux-video', 'mp4', '-S', 'vcodec:h264,lang,quality,res,fps,hdr:12,acodec:aac'],
'mkv': ['--merge-output-format', 'mkv', '--remux-video', 'mkv'],
'sleep': ['--sleep-subtitles', '5', '--sleep-requests', '0.75', '--sleep-interval', '10', '--max-sleep-interval', '20'],
}
class _YoutubeDLOptionParser(optparse.OptionParser): class _YoutubeDLOptionParser(optparse.OptionParser):
# optparse is deprecated since Python 3.2. So assume a stable interface even for private methods # optparse is deprecated since Python 3.2. So assume a stable interface even for private methods
ALIAS_DEST = '_triggered_aliases' ALIAS_DEST = '_triggered_aliases'
@ -215,6 +224,22 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
return e.possibilities[0] return e.possibilities[0]
raise raise
def format_option_help(self, formatter=None):
assert formatter, 'Formatter can not be None'
formatted_help = super().format_option_help(formatter=formatter)
formatter.indent()
heading = formatter.format_heading('Preset Aliases')
formatter.indent()
result = []
for name, args in _PRESET_ALIASES.items():
option = optparse.Option('-t', help=shlex.join(args))
formatter.option_strings[option] = f'-t {name}'
result.append(formatter.format_option(option))
formatter.dedent()
formatter.dedent()
help_lines = '\n'.join(result)
return f'{formatted_help}\n{heading}{help_lines}'
def create_parser(): def create_parser():
def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip): def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
@ -317,6 +342,13 @@ def create_parser():
parser.rargs[:0] = shlex.split( parser.rargs[:0] = shlex.split(
opts if value is None else opts.format(*map(shlex.quote, value))) opts if value is None else opts.format(*map(shlex.quote, value)))
def _preset_alias_callback(option, opt_str, value, parser):
if not value:
return
if value not in _PRESET_ALIASES:
raise optparse.OptionValueError(f'Unknown preset alias: {value}')
parser.rargs[:0] = _PRESET_ALIASES[value]
general = optparse.OptionGroup(parser, 'General Options') general = optparse.OptionGroup(parser, 'General Options')
general.add_option( general.add_option(
'-h', '--help', dest='print_help', action='store_true', '-h', '--help', dest='print_help', action='store_true',
@ -519,6 +551,15 @@ def create_parser():
'Alias options can trigger more aliases; so be careful to avoid defining recursive options. ' 'Alias options can trigger more aliases; so be careful to avoid defining recursive options. '
f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. ' f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. '
'This option can be used multiple times')) 'This option can be used multiple times'))
general.add_option(
'-t', '--preset-alias',
metavar='PRESET', dest='_', type='str',
action='callback', callback=_preset_alias_callback,
help=(
'Applies a predefined set of options. e.g. --preset-alias mp3. '
f'The following presets are available: {", ".join(_PRESET_ALIASES)}. '
'See the "Preset Aliases" section at the end for more info. '
'This option can be used multiple times'))
network = optparse.OptionGroup(parser, 'Network Options') network = optparse.OptionGroup(parser, 'Network Options')
network.add_option( network.add_option(

View File

@ -2044,7 +2044,7 @@ def url_or_none(url):
if not url or not isinstance(url, str): if not url or not isinstance(url, str):
return None return None
url = url.strip() url = url.strip()
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?|wss?):)?//', url) else None
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):