Compare commits

..

No commits in common. "8032ad0af53dc3edcba7f6ecdd8976fb6902b7be" and "9615ae99c0487d8bf8484d657f52a891cfe0a84f" have entirely different histories.

18 changed files with 482 additions and 703 deletions

View File

@ -1770,7 +1770,7 @@ The following extractors use this feature:
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`

View File

@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
return return
if op == 'startswith': if op == 'startswith':
if not got.startswith(val): if not val.startswith(got):
yield field, f'should start with {val!r}, got {got!r}' yield field, f'should start with {val!r}, got {got!r}'
return return

View File

@ -39,7 +39,6 @@ from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
from yt_dlp.networking import ( from yt_dlp.networking import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,
@ -1857,7 +1856,6 @@ class TestRequest:
def test_request_helpers(self): def test_request_helpers(self):
assert HEADRequest('http://example.com').method == 'HEAD' assert HEADRequest('http://example.com').method == 'HEAD'
assert PATCHRequest('http://example.com').method == 'PATCH'
assert PUTRequest('http://example.com').method == 'PUT' assert PUTRequest('http://example.com').method == 'PUT'
def test_headers(self): def test_headers(self):

View File

@ -1783,6 +1783,7 @@ from .rtvcplay import (
from .rtve import ( from .rtve import (
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
RTVEInfantilIE,
RTVELiveIE, RTVELiveIE,
RTVETelevisionIE, RTVETelevisionIE,
) )
@ -2236,11 +2237,7 @@ from .tvplay import (
TVPlayIE, TVPlayIE,
) )
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import ( from .tvw import TvwIE, TvwNewsIE
TvwIE,
TvwNewsIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE from .tweakers import TweakersIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE from .twentythreevideo import TwentyThreeVideoIE

View File

@ -21,7 +21,6 @@ from ..utils import (
int_or_none, int_or_none,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url,
update_url_query, update_url_query,
) )
@ -418,10 +417,6 @@ class AbemaTVIE(AbemaTVBaseIE):
'is_live': is_live, 'is_live': is_live,
'availability': availability, 'availability': availability,
}) })
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
info['thumbnails'] = [{'url': thumbnail}]
return info return info

View File

@ -1,105 +1,64 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class AtresPlayerIE(InfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer' _NETRC_MACHINE = 'atresplayer'
_TESTS = [{ _TESTS = [
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/', {
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
'info_dict': { 'info_dict': {
'id': '5d4aa2c57ed1a88fc715a615',
'ext': 'mp4', 'ext': 'mp4',
'id': '67f2dfb2fb6ab0e4c7203849', 'title': 'Capítulo 7: Asuntos pendientes',
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c', 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."', 'duration': 3413,
'channel': 'laSexta',
'duration': 31,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
'series': 'El Objetivo',
'season': 'Temporada 12',
'timestamp': 1743970079,
'upload_date': '20250406',
}, },
}, { 'skip': 'This video is only available for registered users',
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
'info_dict': {
'ext': 'mp4',
'id': '67f836baa4a5b0e4147ca59a',
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
'channel': 'Antena 3',
'duration': 2556,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
'series': 'El Hormiguero ',
'season': 'Temporada 14',
'timestamp': 1744320111,
'upload_date': '20250410',
}, },
}, { {
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
'info_dict': {
'ext': 'mp4',
'id': '67a6038b64ceca00070f4f69',
'display_id': 'capitulo-3-supervivientes',
'title': 'Capítulo 3: Supervivientes',
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
'channel': 'Flooxer',
'duration': 1196,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
'series': 'BIARA: Proyecto Lázarus',
'season': 'Temporada 1',
'season_number': 1,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1743095191,
'upload_date': '20250327',
},
}, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True, 'only_matching': True,
}, { },
{
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True, 'only_matching': True,
}] },
]
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _perform_login(self, username, password): def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
try: try:
self._download_webpage( target_url = self._download_json(
'https://account.atresplayer.com/auth/v1/login', None, 'https://account.atresmedia.com/api/login', None,
'Logging in', 'Failed to log in', data=urlencode_postdata({ 'Logging in', headers={
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'username': username, 'username': username,
'password': password, 'password': password,
})) }))['targetUrl']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True) raise ExtractorError('Invalid username and/or password', expected=True)
raise raise
self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups() display_id, video_id = self._match_valid_url(url).groups()
metadata_url = self._download_json(
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
query={'href': urllib.parse.urlparse(url).path})['href']
metadata = self._download_json(metadata_url, video_id)
try: try:
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data') episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None) error = self._parse_json(e.cause.response.read(), None)
@ -108,45 +67,37 @@ class AtresPlayerIE(InfoExtractor):
raise ExtractorError(error['error_description'], expected=True) raise ExtractorError(error['error_description'], expected=True)
raise raise
title = episode['titulo']
formats = [] formats = []
subtitles = {} subtitles = {}
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))): for source in episode.get('sources', []):
src_url = source['src'] src = source.get('src')
src_type = source.get('type') if not src:
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
fmts, subs = self._extract_mpd_formats_and_subtitles(
src_url, video_id, mpd_id='dash', fatal=False)
else:
continue continue
formats.extend(fmts) src_type = source.get('type')
self._merge_subtitles(subs, target=subtitles) if src_type == 'application/vnd.apple.mpegurl':
formats, subtitles = self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
elif src_type == 'application/dash+xml':
formats, subtitles = self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False)
heartbeat = episode.get('heartbeat') or {}
omniture = episode.get('omniture') or {}
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
return { return {
'display_id': display_id, 'display_id': display_id,
'id': video_id, 'id': video_id,
'title': title,
'description': episode.get('descripcion'),
'thumbnail': episode.get('imgPoster'),
'duration': int_or_none(episode.get('duration')),
'formats': formats, 'formats': formats,
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(video_data, {
'title': ('titulo', {str}),
'description': ('descripcion', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
'age_limit': ('ageRating', {parse_age_limit}),
}),
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'age_limit': ('ageRating', {parse_age_limit}),
'series': ('format', 'title', {str}),
'season': ('currentSeason', 'title', {str}),
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
'episode_number': ('numberOfEpisode', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
'channel': ('channel', 'title', {str}),
}),
} }

View File

@ -353,7 +353,7 @@ class CDAIE(InfoExtractor):
class CDAFolderIE(InfoExtractor): class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36 _MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.cda.pl/domino264/folder/31188385', 'url': 'https://www.cda.pl/domino264/folder/31188385',
@ -378,9 +378,6 @@ class CDAFolderIE(InfoExtractor):
'title': 'TESTY KOSMETYKÓW', 'title': 'TESTY KOSMETYKÓW',
}, },
'playlist_mincount': 139, 'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -82,10 +82,7 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = [ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
@ -109,9 +106,6 @@ class LinkedInIE(LinkedInBaseIE):
'like_count': int, 'like_count': int,
'subtitles': 'mincount:1', 'subtitles': 'mincount:1',
}, },
}, {
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,9 +1,5 @@
import json
import random
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none from ..utils import int_or_none, url_or_none
from ..utils.traversal import require, traverse_obj from ..utils.traversal import require, traverse_obj
@ -59,81 +55,13 @@ class LocoIE(InfoExtractor):
'upload_date': '20250226', 'upload_date': '20250226',
'modified_date': '20250226', 'modified_date': '20250226',
}, },
}, {
# Requires video authorization
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'md5': '0513edf85c1e65c9521f555f665387d5',
'info_dict': {
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'ext': 'mp4',
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
'uploader_id': '7Y9JNAZC3Q',
'channel': 'ayellol',
'channel_follower_count': int,
'comment_count': int,
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
'duration': 1229,
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
'tags': ['Gameplay', 'Carry'],
'series': 'League of Legends',
'timestamp': 1741182253,
'upload_date': '20250305',
'modified_timestamp': 1741182419,
'modified_date': '20250305',
},
}] }]
# From _app.js
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
def _is_jwt_expired(self, token):
return jwt_decode_hs256(token)['exp'] - time.time() < 300
def _get_access_token(self, video_id):
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
if access_token and not self._is_jwt_expired(access_token):
return access_token
access_token = traverse_obj(self._download_json(
'https://api.getloconow.com/v3/user/device_profile/', video_id,
'Downloading access token', fatal=False, data=json.dumps({
'platform': 7,
'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
'model': 'Mozilla',
'os_name': 'Win32',
'os_ver': '5.0 (Windows)',
'app_ver': '5.0 (Windows)',
}).encode(), headers={
'Content-Type': 'application/json;charset=utf-8',
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
'X-APP-LANG': 'en',
'X-APP-LOCALE': 'en-US',
'X-CLIENT-ID': self._CLIENT_ID,
'X-CLIENT-SECRET': self._CLIENT_SECRET,
'X-PLATFORM': '7',
}), 'access_token')
if access_token and not self._is_jwt_expired(access_token):
self._set_cookie('.loco.com', 'access_token', access_token)
return access_token
def _real_extract(self, url): def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).group('type', 'id') video_type, video_id = self._match_valid_url(url).group('type', 'id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), ( stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')})) 'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
if access_token := self._get_access_token(video_id):
self._request_webpage(
'https://drm.loco.com/v1/streams/playback/', video_id,
'Downloading video authorization', fatal=False, headers={
'authorization': access_token,
}, query={
'stream_uid': stream['uid'],
})
return { return {
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id), 'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),

View File

@ -1,38 +1,31 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
join_nonempty, str_to_int,
parse_count,
parse_duration,
parse_iso8601,
url_or_none, url_or_none,
urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class ManyVidsIE(InfoExtractor): class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# preview video # preview video
'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks', 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
'md5': '738dc723f7735ee9602f7ea352a6d058', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
'info_dict': { 'info_dict': {
'id': '530341-preview', 'id': '133957',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MV Tips & Tricks (Preview)', 'title': 'everthing about me (Preview)',
'description': r're:I will take you on a tour around .{1313}$', 'uploader': 'ellyxxix',
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
'uploader': 'DestinyDiaz',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_timestamp': 1508419904,
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
'release_date': '20171019',
'duration': 3167.0,
}, },
'expected_warnings': ['Only extracting preview'],
}, { }, {
# full video # full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
@ -41,68 +34,129 @@ class ManyVidsIE(InfoExtractor):
'id': '935718', 'id': '935718',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MY FACE REVEAL', 'title': 'MY FACE REVEAL',
'description': r're:Today is the day!! I am finally taking off my mask .{445}$', 'description': 'md5:ec5901d41808b3746fed90face161612',
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
'uploader': 'Sarah Calanthe', 'uploader': 'Sarah Calanthe',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_date': '20181110',
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
'release_timestamp': 1541851200,
'duration': 224.0,
}, },
}] }]
_API_BASE = 'https://www.manyvids.com/bff/store/video'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
formats, preview_only = [], True
for format_id, path in [ real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
('preview', ['teaser', 'filepath']), try:
('transcoded', ['transcodedFilepath']), webpage = self._download_webpage(real_url, video_id)
('filepath', ['filepath']), except Exception:
]: # probably useless fallback
format_url = traverse_obj(video_data, (*path, {url_or_none})) webpage = self._download_webpage(url, video_id)
if not format_url:
info = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'meta details', default='')
info = extract_attributes(info)
player = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
webpage, 'player details', default='')
player = extract_attributes(player)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
mv_token = self._search_regex(
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'mv token', default=None, group='value')
if mv_token:
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
})
formats = []
for v_url, fmt in video_urls_and_ids:
v_url = url_or_none(v_url)
if not v_url:
continue continue
if determine_ext(format_url) == 'm3u8': if determine_ext(v_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id)) formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
else: else:
formats.append({ formats.append({
'url': format_url, 'url': v_url,
'format_id': format_id, 'format_id': fmt,
'preference': -10 if format_id == 'preview' else None,
'quality': 10 if format_id == 'filepath' else None,
'height': int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
}) })
if format_id != 'preview':
preview_only = False
metadata = traverse_obj( self._remove_duplicate_formats(formats)
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
title = traverse_obj(metadata, ('title', {clean_html}))
if preview_only: for f in formats:
title = join_nonempty(title, '(Preview)', delim=' ') if f.get('height') is None:
video_id += '-preview' f['height'] = int_or_none(
self.report_warning( self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}') if '/preview/' in f['url']:
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
def get_likes():
likes = self._search_regex(
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
**traverse_obj(metadata, { 'description': txt_or_none(info.get('data-meta-description')),
'description': ('description', {clean_html}), 'uploader': txt_or_none(info.get('data-meta-author')),
'uploader': ('model', 'displayName', {clean_html}), 'thumbnail': (
'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any), url_or_none(info.get('data-meta-image'))
'view_count': ('views', {parse_count}), or url_or_none(player.get('data-video-screenshot'))),
'like_count': ('likes', {parse_count}), 'view_count': get_views(),
'release_timestamp': ('launchDate', {parse_iso8601}), 'like_count': get_likes(),
'duration': ('videoDuration', {parse_duration}),
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
}),
} }

View File

@ -14,9 +14,8 @@ from ..utils import (
int_or_none, int_or_none,
parse_qs, parse_qs,
srt_subtitles_timecode, srt_subtitles_timecode,
url_or_none, traverse_obj,
) )
from ..utils.traversal import traverse_obj
class PanoptoBaseIE(InfoExtractor): class PanoptoBaseIE(InfoExtractor):
@ -346,16 +345,21 @@ class PanoptoIE(PanoptoBaseIE):
subtitles = {} subtitles = {}
for stream in streams or []: for stream in streams or []:
stream_formats = [] stream_formats = []
for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))): http_stream_url = stream.get('StreamHttpUrl')
stream_url = stream.get('StreamUrl')
if http_stream_url:
stream_formats.append({'url': http_stream_url})
if stream_url:
media_type = stream.get('ViewerMediaFileTypeName') media_type = stream.get('ViewerMediaFileTypeName')
if media_type in ('hls', ): if media_type in ('hls', ):
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False) m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
stream_formats.extend(fmts) stream_formats.extend(m3u8_formats)
self._merge_subtitles(subs, target=subtitles) subtitles = self._merge_subtitles(subtitles, stream_subtitles)
else: else:
stream_formats.append({ stream_formats.append({
'url': stream_url, 'url': stream_url,
'ext': media_type,
}) })
for fmt in stream_formats: for fmt in stream_formats:
fmt.update({ fmt.update({

View File

@ -1,142 +1,35 @@
import base64 import base64
import io import io
import struct import struct
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
make_archive_id,
parse_iso8601,
qualities, qualities,
url_or_none, remove_end,
remove_start,
try_get,
) )
from ..utils.traversal import subs_list_to_dict, traverse_obj
class RTVEBaseIE(InfoExtractor): class RTVEALaCartaIE(InfoExtractor):
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
@staticmethod
def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length_data = encrypted_data.read(4)
length = struct.unpack('!I', length_data)[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
data = bytes(filter(None, data))
alphabet_data, _, url_data = data.partition(b'#')
quality_str, _, url_data = url_data.rpartition(b'%%')
quality_str = quality_str.decode() or ''
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
url = RTVEBaseIE._get_url(alphabet, url_data)
yield quality_str, url
encrypted_data.read(4) # CRC
@staticmethod
def _get_url(alphabet, url_data):
url = ''
f = 0
e = 3
b = 1
for char in url_data.decode('iso-8859-1'):
if f == 0:
l = int(char) * 10
f = 1
else:
if e == 0:
l += int(char)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
@staticmethod
def _get_alphabet(alphabet_data):
alphabet = []
e = 0
d = 0
for char in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(char)
d = e = (e + 1) % 4
else:
d -= 1
return alphabet
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
formats, subtitles = [], {}
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
for manager in ('rtveplayw', 'default'):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
if not png:
continue
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, 'dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats, subtitles
def _parse_metadata(self, metadata):
return traverse_obj(metadata, {
'title': ('title', {str.strip}),
'alt_title': ('alt', {str.strip}),
'description': ('description', {clean_html}),
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
'duration': ('duration', {float_or_none(scale=1000)}),
'is_live': ('live', {bool}),
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
})
class RTVEALaCartaIE(RTVEBaseIE):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta and Play' IE_DESC = 'RTVE a la carta'
_VALID_URL = [ _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': 'a964547824359a5753aef09d79fe984b', 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'info_dict': { 'info_dict': {
'id': '3088905', 'id': '2491869',
'ext': 'mp4', 'ext': 'mp4',
'title': 'En torno a la silla', 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 1216.981, 'duration': 5024.566,
'series': 'La aventura del Saber', 'series': 'Balonmano',
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'note': 'Live stream', 'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
@ -145,88 +38,140 @@ class RTVEALaCartaIE(RTVEBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True, 'is_live': True,
'live_status': 'is_live',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
}, },
'params': { 'params': {
'skip_download': 'live stream', 'skip_download': 'live stream',
}, },
}, { }, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
'md5': 'f3cf0d1902d008c48c793e736706c174', 'md5': 'd850f3c8731ea53952ebab489cf81cbf',
'info_dict': { 'info_dict': {
'id': '4236788', 'id': '4236788',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Episodio 104', 'title': 'Servir y proteger - Capítulo 104',
'duration': 3222.8, 'duration': 3222.0,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'Servir y proteger',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
'info_dict': {
'id': '16177116',
'ext': 'mp4',
'title': 'Saber vivir - 07/07/24',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 2162.68,
'series': 'Saber vivir',
},
}, {
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
'info_dict': {
'id': '7048976',
'ext': 'mp4',
'title': 'Gusano',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 292.86,
'series': 'Agus & Lui: Churros y Crafts',
'_old_archive_ids': ['rtveinfantil 7048976'],
},
}] }]
def _get_subtitles(self, video_id): def _real_initialize(self):
subtitle_data = self._download_json( user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id, self._manager = self._download_json(
'Downloading subtitles info') 'http://www.rtve.es/odin/loki/' + user_agent_b64,
return traverse_obj(subtitle_data, ('page', 'items', ..., { None, 'Fetching manager info')['manager']
'id': ('lang', {str}),
'url': ('src', {url_or_none}), @staticmethod
}, all, {subs_list_to_dict(lang='es')})) def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
alphabet_data, text = data.split(b'\0')
quality, url_data = text.split(b'%%')
alphabet = []
e = 0
d = 0
for l in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data.decode('iso-8859-1'):
if f == 0:
l = int(letter) * 10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
yield quality.decode(), url
encrypted_data.read(4) # CRC
def _extract_png_formats(self, video_id):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
metadata = self._download_json( info = self._download_json(
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
video_id)['page']['items'][0] video_id)['page']['items'][0]
if metadata['state'] == 'DESPU': if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True) raise ExtractorError('The video is no longer available', expected=True)
formats, subtitles = self._extract_png_formats_and_subtitles(video_id) title = info['title'].strip()
formats = self._extract_png_formats(video_id)
self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles) subtitles = None
sbt_file = info.get('sbtFile')
if sbt_file:
subtitles = self.extract_subtitles(video_id, sbt_file)
is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/') is_live = info.get('live') is True
return { return {
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'thumbnail': info.get('image'),
'subtitles': subtitles, 'subtitles': subtitles,
**self._parse_metadata(metadata), 'duration': float_or_none(info.get('duration'), 1000),
'_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None, 'is_live': is_live,
'series': info.get('programTitle'),
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVEAudioIE(RTVEBaseIE):
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:audio' IE_NAME = 'rtve.es:audio'
IE_DESC = 'RTVE audio' IE_DESC = 'RTVE audio'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
@ -235,11 +180,9 @@ class RTVEAudioIE(RTVEBaseIE):
'id': '5889192', 'id': '5889192',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Códigos informáticos', 'title': 'Códigos informáticos',
'alt_title': 'Códigos informáticos - Escuchar ahora', 'thumbnail': r're:https?://.+/1598856591583.jpg',
'duration': 349.440, 'duration': 349.440,
'series': 'A hombros de gigantes', 'series': 'A hombros de gigantes',
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
@ -248,11 +191,9 @@ class RTVEAudioIE(RTVEBaseIE):
'id': '5791165', 'id': '5791165',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Ignatius Farray', 'title': 'Ignatius Farray',
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
'thumbnail': r're:https?://.+/1613243011863.jpg', 'thumbnail': r're:https?://.+/1613243011863.jpg',
'duration': 3559.559, 'duration': 3559.559,
'series': 'En Radio 3', 'series': 'En Radio 3',
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
@ -261,101 +202,126 @@ class RTVEAudioIE(RTVEBaseIE):
'id': '6082623', 'id': '6082623',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Capítulo 26 y último: La muerte de Victor', 'title': 'Capítulo 26 y último: La muerte de Victor',
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
'thumbnail': r're:https?://.+/1632147445707.jpg', 'thumbnail': r're:https?://.+/1632147445707.jpg',
'duration': 3174.086, 'duration': 3174.086,
'series': 'Frankenstein o el moderno Prometeo', 'series': 'Frankenstein o el moderno Prometeo',
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
}, },
}] }]
def _extract_png_formats(self, audio_id):
"""
This function retrieves media related png thumbnail which obfuscate
valuable information about the media. This information is decrypted
via base class _decrypt_url function providing media quality and
media url
"""
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
audio_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, audio_url in self._decrypt_url(png):
ext = determine_ext(audio_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
audio_url, audio_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
audio_url, audio_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': audio_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
audio_id = self._match_id(url) audio_id = self._match_id(url)
metadata = self._download_json( info = self._download_json(
f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0] f'https://www.rtve.es/api/audios/{audio_id}.json',
audio_id)['page']['items'][0]
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
return { return {
'id': audio_id, 'id': audio_id,
'formats': formats, 'title': info['title'].strip(),
'subtitles': subtitles, 'thumbnail': info.get('thumbnail'),
**self._parse_metadata(metadata), 'duration': float_or_none(info.get('duration'), 1000),
'series': try_get(info, lambda x: x['programInfo']['title']),
'formats': self._extract_png_formats(audio_id),
} }
class RTVELiveIE(RTVEBaseIE): class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
_TESTS = [{
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
'info_dict': {
'id': '3040283',
'ext': 'mp4',
'title': 'Maneras de vivir',
'thumbnail': r're:https?://.+/1426182947956\.JPG',
'duration': 357.958,
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}]
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams' IE_DESC = 'RTVE.es live streams'
_VALID_URL = [ _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/directo/la-1/', 'url': 'http://www.rtve.es/directo/la-1/',
'info_dict': { 'info_dict': {
'id': 'la-1', 'id': 'la-1',
'ext': 'mp4', 'ext': 'mp4',
'live_status': 'is_live', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'title': str,
'description': str,
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': {'skip_download': 'live stream'}, 'params': {
}, { 'skip_download': 'live stream',
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
'info_dict': {
'id': 'tdp',
'ext': 'mp4',
'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': {'skip_download': 'live stream'},
}, {
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
data_setup = self._search_json( vidplayer_id = self._search_regex(
r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'', (r'playerId=player([0-9]+)',
webpage, 'data_setup', video_id) r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
r'data-id=["\'](\d+)'),
formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset']) webpage, 'internal video ID')
return { return {
'id': video_id, 'id': video_id,
**self._search_json_ld(webpage, video_id, fatal=False), 'title': title,
'title': self._html_extract_title(webpage), 'formats': self._extract_png_formats(vidplayer_id),
'formats': formats,
'subtitles': subtitles,
'is_live': True, 'is_live': True,
} }
class RTVETelevisionIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor):
IE_NAME = 'rtve.es:television' IE_NAME = 'rtve.es:television'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml' _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
_TEST = { _TEST = {
'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml', 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
'info_dict': { 'info_dict': {
'id': '572515', 'id': '3069778',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Clase inédita', 'title': 'Documentos TV - La revolución del móvil',
'duration': 335.817, 'duration': 3496.948,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'El coro de la cárcel',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -366,8 +332,11 @@ class RTVETelevisionIE(InfoExtractor):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
play_url = self._html_search_meta('contentUrl', webpage) alacarta_url = self._search_regex(
if play_url is None: r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
raise ExtractorError('The webpage doesn\'t contain any video', expected=True) webpage, 'alacarta url', default=None)
if alacarta_url is None:
raise ExtractorError(
'The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())

View File

@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):
class TVPVODVideoIE(TVPVODBaseIE): class TVPVODVideoIE(TVPVODBaseIE):
IE_NAME = 'tvp:vod' IE_NAME = 'tvp:vod'
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)' _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -568,9 +568,6 @@ class TVPVODVideoIE(TVPVODBaseIE):
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': 're:https?://.+', 'thumbnail': 're:https?://.+',
}, },
}, {
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,21 +1,21 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import clean_html, extract_attributes, remove_end, unified_timestamp, url_or_none
clean_html, from ..utils.traversal import find_elements, traverse_obj
extract_attributes,
parse_qs,
remove_end,
require,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwIE(InfoExtractor): class TvwBaseIE(InfoExtractor):
IE_NAME = 'tvw' def _get_title(self, webpage):
_VALID_URL = r'https?://(?:www\.)?tvw\.org/(?:video|watch)/?(?:\?eventID=)?(?P<id>[^/?#]+)' return remove_end(self._og_search_title(webpage, default=None), ' - TVW')
def _get_description(self, webpage):
return self._og_search_description(webpage, default=None)
class TvwIE(TvwBaseIE):
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703', 'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -75,28 +75,11 @@ class TvwIE(InfoExtractor):
'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'display_id': 'washington-to-washington-a-new-space-race-2022041111',
'categories': ['Washington to Washington', 'General Interest'], 'categories': ['Washington to Washington', 'General Interest'],
}, },
}, {
'url': 'https://tvw.org/watch?eventID=2025041235',
'md5': '7d697c02f110b37d6a47622ea608ca90',
'info_dict': {
'id': '2025041235',
'ext': 'mp4',
'title': 'Legislative Review -- April 18',
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
'description': 'Legislative Review features highlights from Friday\'s legislative activity (4/18/25).',
'timestamp': 1745006400,
'upload_date': '20250418',
'location': 'Hayner Media Center',
'categories': ['Legislative Review'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
# Use a newer user agent as the default yt-dlp one triggers the Cloudflare anti-bot challenge webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
})
client_id = self._html_search_meta('clientID', webpage, fatal=True) client_id = self._html_search_meta('clientID', webpage, fatal=True)
video_id = self._html_search_meta('eventID', webpage, fatal=True) video_id = self._html_search_meta('eventID', webpage, fatal=True)
@ -128,8 +111,8 @@ class TvwIE(InfoExtractor):
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'), 'title': self._get_title(webpage),
'description': self._og_search_description(webpage, default=None), 'description': self._get_description(webpage),
**traverse_obj(video_data, { **traverse_obj(video_data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {clean_html}), 'description': ('description', {clean_html}),
@ -142,9 +125,9 @@ class TvwIE(InfoExtractor):
} }
class TvwNewsIE(InfoExtractor): class TvwNewsIE(TvwBaseIE):
IE_NAME = 'tvw:News' IE_NAME = 'Tvw:News'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/(\d{4})/(0[1-9]|1[0-2])/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/', 'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/',
'info_dict': { 'info_dict': {
@ -173,58 +156,11 @@ class TvwNewsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
# Use a newer user agent as the default yt-dlp one triggers the Cloudflare anti-bot challenge webpage = self._download_webpage(url, playlist_id)
webpage = self._download_webpage(url, playlist_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
})
video_ids = traverse_obj(webpage, ( video_ids = traverse_obj(webpage, (
{find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid')) {find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid'))
return self.playlist_from_matches( return self.playlist_result(
(f'https://tvw.org/watch?eventID={video_id}' for video_id in video_ids), playlist_id, (self.url_result(f'https://tvw.org/watch?eventID={video_id}') for video_id in video_ids), playlist_id,
playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'), playlist_title=self._get_title(webpage), playlist_description=self._get_description(webpage))
playlist_description=self._og_search_description(webpage, default=None), ie=TvwIE)
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/tvchannels/air/',
'info_dict': {
'id': 'air',
'ext': 'mp4',
'title': r're:TVW Cable Channel Live Stream',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}, {
'url': 'https://tvw.org/tvchannels/tvw2/',
'info_dict': {
'id': 'tvw2',
'ext': 'mp4',
'title': r're:TVW-2 Broadcast Channel',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
# Use a newer user agent as the default yt-dlp one triggers the Cloudflare anti-bot challenge
webpage = self._download_webpage(url, video_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
})
m3u8_url = traverse_obj(webpage, (
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'is_live': True,
}

View File

@ -524,16 +524,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
response = self._extract_response( response = self._extract_response(
item_id=f'{item_id} page {page_num}', item_id=f'{item_id} page {page_num}',
query=continuation, headers=headers, ytcfg=ytcfg, query=continuation, headers=headers, ytcfg=ytcfg,
check_get_keys=( check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
# Playlist recommendations may return with no data - ignore
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
))
if not response: if not response:
break break
continuation = None
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
# See: https://github.com/ytdl-org/youtube-dl/issues/28702 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self._extract_visitor_data(response) or visitor_data visitor_data = self._extract_visitor_data(response) or visitor_data
@ -570,13 +564,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield from func(video_items_renderer) yield from func(video_items_renderer)
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
# In the case only a continuation is returned, try to follow it. if not video_items_renderer:
# We extract this after trying to extract non-continuation items as otherwise this
# may be prioritized over other continuations.
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
if not continuation and not video_items_renderer:
break break
@staticmethod @staticmethod
@ -1011,14 +999,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists', 'title': 'Igor Kleiner Ph.D. - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner ', 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'channel': 'Igor Kleiner ', 'channel': 'Igor Kleiner Ph.D.',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'tags': 'count:23', 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
@ -1028,19 +1016,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists', 'title': 'Igor Kleiner Ph.D. - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner ', 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': 'count:23', 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'channel': 'Igor Kleiner ', 'channel': 'Igor Kleiner Ph.D.',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'playlists, series', 'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -1079,23 +1066,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'basic, single video playlist', 'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU', 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'title': 'single video playlist', 'title': 'youtube-dl public playlist',
'description': '', 'description': '',
'tags': [], 'tags': [],
'view_count': int, 'view_count': int,
'modified_date': '20250417', 'modified_date': '20201130',
'channel': 'cole-dlp-test-acc', 'channel': 'Sergey M.',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'availability': 'public', 'availability': 'public',
'uploader': 'cole-dlp-test-acc', 'uploader': 'Sergey M.',
'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
'uploader_id': '@coletdjnz', 'uploader_id': '@sergeym.6173',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
@ -1185,11 +1171,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 17, 'playlist_mincount': 17,
}, { }, {
'note': 'Posts tab', 'note': 'Community tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': { 'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Posts', 'title': 'lex will - Community',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will', 'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
@ -1202,14 +1188,30 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 18, 'playlist_mincount': 18,
}, { }, {
# TODO: fix channel_is_verified extraction 'note': 'Channels tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'channel_follower_count': int,
'uploader_url': 'https://www.youtube.com/@lexwill718',
'uploader_id': '@lexwill718',
'uploader': 'lex will',
},
'playlist_mincount': 12,
}, {
'note': 'Search tab', 'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40, 'playlist_mincount': 40,
'info_dict': { 'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra', 'title': '3Blue1Brown - Search - linear algebra',
'description': 'md5:602e3789e6a0cb7d9d352186b720e395', 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'], 'tags': ['Mathematics'],
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
@ -1230,7 +1232,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
@ -1293,25 +1294,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button', 'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2', 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
'info_dict': { 'info_dict': {
'title': 'The Memes Of 2010s.....', 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2', 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
'view_count': int, 'view_count': int,
'channel': "I'm Not JiNxEd", 'channel': 'Phim Siêu Nhân Nhật Bản',
'tags': [], 'tags': [],
'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1', 'description': '',
'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg', 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg', 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public', 'availability': 'public',
'uploader_url': 'https://www.youtube.com/@imnotjinxed1998', 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
'uploader_id': '@imnotjinxed1998', 'uploader_id': '@phimsieunhannhatban',
'uploader': "I'm Not JiNxEd", 'uploader': 'Phim Siêu Nhân Nhật Bản',
}, },
'playlist_mincount': 150, 'playlist_mincount': 200,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
'note': 'Playlist with unavailable videos in page 7', 'note': 'Playlist with unavailable videos in page 7',
@ -1334,7 +1334,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1000, 'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix availability extraction
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': { 'info_dict': {
@ -1385,7 +1384,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': { 'info_dict': {
'id': 'YDvsBbKfLPA', # This will keep changing 'id': 'hGkQjiJLjWQ', # This will keep changing
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'upload_date': r're:\d{8}', 'upload_date': r're:\d{8}',
@ -1410,8 +1409,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@SkyNews', 'uploader_id': '@SkyNews',
'uploader': 'Sky News', 'uploader': 'Sky News',
'channel_is_verified': True, 'channel_is_verified': True,
'media_type': 'livestream',
'timestamp': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1499,7 +1496,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'VLPL, should redirect to playlist?list=PL...', 'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'info_dict': { 'info_dict': {
@ -1541,7 +1537,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
# Treat as a general feed # Treat as a general feed
# TODO: fix extraction
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': { 'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
@ -1565,21 +1560,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['YouTube Music is not directly supported'], 'expected_warnings': ['YouTube Music is not directly supported'],
}, { }, {
'note': 'unlisted single video playlist', 'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_', 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
'info_dict': { 'info_dict': {
'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_', 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
'title': 'unlisted playlist', 'title': 'yt-dlp unlisted playlist test',
'availability': 'unlisted', 'availability': 'unlisted',
'tags': [], 'tags': [],
'modified_date': '20250417', 'modified_date': '20220418',
'channel': 'cole-dlp-test-acc', 'channel': 'colethedj',
'view_count': int, 'view_count': int,
'description': '', 'description': '',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_url': 'https://www.youtube.com/@colethedj1894',
'uploader_id': '@coletdjnz', 'uploader_id': '@colethedj1894',
'uploader': 'cole-dlp-test-acc', 'uploader': 'colethedj',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -1601,7 +1596,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# By default, recommended is always empty.
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
'url': 'https://www.youtube.com/feed/recommended', 'url': 'https://www.youtube.com/feed/recommended',
'info_dict': { 'info_dict': {
@ -1609,7 +1603,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'recommended', 'title': 'recommended',
'tags': [], 'tags': [],
}, },
'playlist_count': 0, 'playlist_mincount': 50,
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}}, 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
@ -1634,7 +1628,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'skip': 'Query for sorting no longer works', 'skip': 'Query for sorting no longer works',
}, { }, {
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
'info_dict': { 'info_dict': {
@ -1661,12 +1654,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': { 'info_dict': {
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'modified_date': '20250115', 'modified_date': '20220407',
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
'tags': [], 'tags': [],
'availability': 'unlisted', 'availability': 'unlisted',
@ -1700,7 +1692,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['Preferring "ja"'], 'expected_warnings': ['Preferring "ja"'],
}, { }, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that # XXX: this should really check flat playlist entries, but the test suite doesn't support that
# TODO: fix availability extraction
'note': 'preferred lang set with playlist with translated video titles', 'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': { 'info_dict': {
@ -1723,7 +1714,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# shorts audio pivot for 2GtVksBMYFM. # shorts audio pivot for 2GtVksBMYFM.
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==', 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
# TODO: fix extraction
'info_dict': { 'info_dict': {
'id': 'sfv_audio_pivot', 'id': 'sfv_audio_pivot',
'title': 'sfv_audio_pivot', 'title': 'sfv_audio_pivot',
@ -1761,7 +1751,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 8, 'playlist_mincount': 8,
}, { }, {
# Should get three playlists for videos, shorts and streams tabs # Should get three playlists for videos, shorts and streams tabs
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': { 'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
@ -1769,7 +1758,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f', 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
'channel': 'Polka Ch. 尾丸ポルカ', 'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35', 'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka', 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
@ -1780,14 +1769,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 3, 'playlist_count': 3,
}, { }, {
# Shorts tab with channel with handle # Shorts tab with channel with handle
# TODO: fix channel_is_verified extraction # TODO: fix channel description
'url': 'https://www.youtube.com/@NotJustBikes/shorts', 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': { 'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A', 'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts', 'title': 'Not Just Bikes - Shorts',
'tags': 'count:10', 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031', 'description': 'md5:5e82545b3a041345927a92d0585df247',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes', 'channel': 'Not Just Bikes',
@ -1808,7 +1797,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一', 'channel': '中村悠一',
'channel_follower_count': int, 'channel_follower_count': int,
'description': 'md5:e8fd705073a594f27d6d6d020da560dc', 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura', 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura', 'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一', 'uploader': '中村悠一',
@ -1826,7 +1815,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
# No videos tab but has a shorts tab # No videos tab but has a shorts tab
# TODO: fix metadata extraction
'url': 'https://www.youtube.com/c/TKFShorts', 'url': 'https://www.youtube.com/c/TKFShorts',
'info_dict': { 'info_dict': {
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg', 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
@ -1863,7 +1851,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Shorts url result in shorts tab # Shorts url result in shorts tab
# TODO: Fix channel id extraction # TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1892,7 +1879,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# Live video status should be extracted # Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': { 'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg', 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
@ -1921,7 +1907,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
# Channel renderer metadata. Contains number of videos on the channel # Channel renderer metadata. Contains number of videos on the channel
# TODO: channels tab removed, change this test to use another page with channel renderer
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1955,9 +1940,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
'skip': 'channels tab removed',
}, { }, {
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@3blue1brown/about', 'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': { 'info_dict': {
'id': '@3blue1brown', 'id': '@3blue1brown',
@ -1967,7 +1950,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'description': 'md5:602e3789e6a0cb7d9d352186b720e395', 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'uploader_url': 'https://www.youtube.com/@3blue1brown', 'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown', 'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown', 'uploader': '3Blue1Brown',
@ -1993,7 +1976,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 5, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',
'info_dict': { 'info_dict': {
'id': 'UCgFwu-j5-xNJml2FtTrrB3A', 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
@ -2033,7 +2015,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 100, 'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'Tags containing spaces', 'note': 'Tags containing spaces',
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'playlist_count': 3, 'playlist_count': 3,
@ -2054,24 +2035,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'challenges', 'sketches', 'scary games', 'funny games', 'rage games', 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
'mark fischbach'], 'mark fischbach'],
}, },
}, {
# https://github.com/yt-dlp/yt-dlp/issues/12933
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
'url': 'https://www.youtube.com/@sbcitygov/streams',
'playlist_mincount': 150,
'info_dict': {
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'channel': 'sbcitygov',
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'title': 'sbcitygov - Live',
'channel_follower_count': int,
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
'uploader_id': '@sbcitygov',
'uploader_url': 'https://www.youtube.com/@sbcitygov',
'uploader': 'sbcitygov',
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
'tags': [],
},
}] }]
@classmethod @classmethod

View File

@ -3646,8 +3646,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'sign in' in reason.lower(): if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more') reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}' reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
reason += '. YouTube is requiring a captcha challenge before playback'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -3876,7 +3874,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not traverse_obj(initial_data, 'contents'): if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.') self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None initial_data = None
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'): if not initial_data:
query = {'videoId': video_id} query = {'videoId': video_id}
query.update(self._get_checkok_params()) query.update(self._get_checkok_params())
initial_data = self._extract_response( initial_data = self._extract_response(

View File

@ -3,7 +3,6 @@ import warnings
from .common import ( from .common import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,

View File

@ -505,7 +505,6 @@ class Request:
HEADRequest = functools.partial(Request, method='HEAD') HEADRequest = functools.partial(Request, method='HEAD')
PATCHRequest = functools.partial(Request, method='PATCH')
PUTRequest = functools.partial(Request, method='PUT') PUTRequest = functools.partial(Request, method='PUT')