Compare commits

...

19 Commits

Author SHA1 Message Date
coletdjnz
72a4a46152
refactor: provider _urlopen into _request_webpage, make pot_request optional 2025-04-18 17:21:26 +12:00
coletdjnz
7b0dd8b2d1
fix: expand webpo visitor id character set support 2025-04-18 16:51:05 +12:00
coletdjnz
4ee46531c9
Merge remote-tracking branch 'upstream/master' into feat/youtube/pot-provider-framework 2025-04-18 16:33:41 +12:00
doe1080
ceab4d5ed6
[networking] Add PATCH request shortcut (#12884)
Authored by: doe1080
2025-04-18 11:46:19 +12:00
leeblackc
ed6c6d7eef
[ie/youtube] Add extractor arg to skip "initial_data" request (#12865)
Closes https://github.com/yt-dlp/yt-dlp/issues/12826

Authored by: leeblackc
2025-04-18 11:42:08 +12:00
coletdjnz
f484c51599
[ie/youtube] Add warning on video captcha challenge (#12939)
Authored by: coletdjnz
2025-04-18 11:40:39 +12:00
coletdjnz
72ba487930
[ie/youtube:tab] Extract continuation from empty page (#12938)
Fixes https://github.com/yt-dlp/yt-dlp/issues/12933 https://github.com/yt-dlp/yt-dlp/issues/8206

Authored by: coletdjnz
2025-04-18 11:34:30 +12:00
Subrat Lima
74e90dd9b8
[ie/LRTRadio] Add extractor (#12801)
Closes #12745
Authored by: subrat-lima
2025-04-06 23:26:44 +00:00
Snack
1d45e30537
[ie/niconico:live] Fix extractor (#12809)
Closes #12365
Authored by: Snack-X
2025-04-06 23:24:58 +00:00
Frank Aurich
3c1c75ecb8
[ie/kika] Add playlist extractor (#12832)
Closes #3658
Authored by: 1100101
2025-04-06 21:04:24 +02:00
J.Luis
7faa18b83d
[ie/ivoox] Add extractor (#12768)
Authored by: NeonMan, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2025-04-06 20:48:07 +02:00
doe1080
a473e59233
[utils] url_or_none: Support WebSocket URLs (#12848)
Authored by: doe1080
2025-04-06 20:46:08 +02:00
sepro
45f01de00e
[utils] _yield_json_ld: Make function less fatal (#12855)
Authored by: seproDev
2025-04-06 20:31:00 +02:00
WouterGordts
db6d1f145a
[ie/mixcloud] Refactor extractor (#12830)
Authored by: WouterGordts, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2025-04-06 19:51:08 +02:00
sepro
a3f2b54c25
[ie/dzen.ru] Rework extractors (#12852)
Closes #5523, Closes #10818, Closes #11385, Closes #11470
Authored by: seproDev
2025-04-06 17:41:48 +02:00
LN Liberda
91832111a1
[ie/TokFMPodcast] Fix formats extraction (#12842)
Authored by: selfisekai
2025-04-06 17:05:43 +02:00
Ben Faerber
425017531f
[ie/parti] Add extractors (#12769)
Closes #11434
Authored by: benfaerber
2025-04-05 22:09:53 +02:00
sepro
58d0c83457
[ie/rumble] Improve format extraction (#12838)
Closes #12837
Authored by: seproDev
2025-04-05 20:29:57 +02:00
sepro
4ebf41309d
[ie/CrowdBunker] Make format extraction non-fatal (#12836)
Authored by: seproDev
2025-04-05 19:49:51 +02:00
28 changed files with 659 additions and 258 deletions

View File

@ -1770,7 +1770,7 @@ The following extractors use this feature:
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`

View File

@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
return return
if op == 'startswith': if op == 'startswith':
if not val.startswith(got): if not got.startswith(val):
yield field, f'should start with {val!r}, got {got!r}' yield field, f'should start with {val!r}, got {got!r}'
return return

View File

@ -39,6 +39,7 @@ from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
from yt_dlp.networking import ( from yt_dlp.networking import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,
@ -1856,6 +1857,7 @@ class TestRequest:
def test_request_helpers(self): def test_request_helpers(self):
assert HEADRequest('http://example.com').method == 'HEAD' assert HEADRequest('http://example.com').method == 'HEAD'
assert PATCHRequest('http://example.com').method == 'PATCH'
assert PUTRequest('http://example.com').method == 'PUT' assert PUTRequest('http://example.com').method == 'PUT'
def test_headers(self): def test_headers(self):

View File

@ -29,8 +29,8 @@ class TestGetWebPoContentBinding:
assert get_webpo_content_binding(pot_request) == expected assert get_webpo_content_binding(pot_request) == expected
def test_extract_visitor_id(self, pot_request): def test_extract_visitor_id(self, pot_request):
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D' pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D'
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('12345678901', ContentBindingType.VISITOR_ID) assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID)
def test_invalid_visitor_id(self, pot_request): def test_invalid_visitor_id(self, pot_request):
# visitor id not alphanumeric (i.e. protobuf extraction failed) # visitor id not alphanumeric (i.e. protobuf extraction failed)

View File

@ -16,7 +16,7 @@ from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers
@pytest.fixture() @pytest.fixture()
def pot_request(pot_request) -> PoTokenRequest: def pot_request(pot_request) -> PoTokenRequest:
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D' # visitor_id=12345678901 pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_-
return pot_request return pot_request
@ -51,13 +51,13 @@ class TestWebPoPCSP:
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [ *[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER'] 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [ for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}), (PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'video_id'}), (PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}), (PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
]], ]],
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}), ('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '12345678901', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}), ('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '12345678901', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}), ('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
]) ])
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected): def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
@ -68,7 +68,7 @@ class TestWebPoPCSP:
pot_request.innertube_context['client']['remoteHost'] = remote_host pot_request.innertube_context['client']['remoteHost'] = remote_host
pot_request.request_source_address = source_address pot_request.request_source_address = source_address
pot_request.request_proxy = request_proxy pot_request.request_proxy = request_proxy
pot_request.video_id = '12345678901' # same as visitor id to test type pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type
assert pcs.generate_cache_spec(pot_request).key_bindings == expected assert pcs.generate_cache_spec(pot_request).key_bindings == expected
@ -78,7 +78,7 @@ class TestWebPoPCSP:
pot_request.innertube_context['client']['clientName'] = 'WEB' pot_request.innertube_context['client']['clientName'] = 'WEB'
pot_request.context = PoTokenContext.GVS pot_request.context = PoTokenContext.GVS
pot_request.is_authenticated = False pot_request.is_authenticated = False
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D', 'cbt': 'visitor_data'} assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'}
def test_default_ttl(self, ie, logger, pot_request): def test_default_ttl(self, ie, logger, pot_request):
pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) pcs = WebPoPCSP(ie=ie, logger=logger, settings={})

View File

@ -149,7 +149,7 @@ class TestPoTokenProvider:
with pytest.raises(PoTokenProviderRejectedRequest): with pytest.raises(PoTokenProviderRejectedRequest):
provider.request_pot(pot_request) provider.request_pot(pot_request)
def test_provider_urlopen(self, ie, logger, pot_request): def test_provider_request_webpage(self, ie, logger, pot_request):
provider = ExamplePTP(ie=ie, logger=logger, settings={}) provider = ExamplePTP(ie=ie, logger=logger, settings={})
cookiejar = YoutubeDLCookieJar() cookiejar = YoutubeDLCookieJar()
@ -162,16 +162,17 @@ class TestPoTokenProvider:
ie._downloader.urlopen = mock_urlopen ie._downloader.urlopen = mock_urlopen
sent_request = provider._urlopen(pot_request, Request( sent_request = provider._request_webpage(Request(
'https://example.com', 'https://example.com',
)) ), pot_request=pot_request)
assert sent_request.url == 'https://example.com' assert sent_request.url == 'https://example.com'
assert sent_request.headers['User-Agent'] == 'example-user-agent' assert sent_request.headers['User-Agent'] == 'example-user-agent'
assert sent_request.proxies == {'all': 'socks5://example-proxy.com'} assert sent_request.proxies == {'all': 'socks5://example-proxy.com'}
assert sent_request.extensions['cookiejar'] is cookiejar assert sent_request.extensions['cookiejar'] is cookiejar
assert 'Requesting webpage' in logger.messages['info']
def test_provider_urlopen_override(self, ie, logger, pot_request): def test_provider_request_webpage_override(self, ie, logger, pot_request):
provider = ExamplePTP(ie=ie, logger=logger, settings={}) provider = ExamplePTP(ie=ie, logger=logger, settings={})
cookiejar_request = YoutubeDLCookieJar() cookiejar_request = YoutubeDLCookieJar()
@ -184,17 +185,47 @@ class TestPoTokenProvider:
ie._downloader.urlopen = mock_urlopen ie._downloader.urlopen = mock_urlopen
sent_request = provider._urlopen(pot_request, Request( sent_request = provider._request_webpage(Request(
'https://example.com', 'https://example.com',
headers={'User-Agent': 'override-user-agent-override'}, headers={'User-Agent': 'override-user-agent-override'},
proxies={'http': 'http://example-proxy-override.com'}, proxies={'http': 'http://example-proxy-override.com'},
extensions={'cookiejar': YoutubeDLCookieJar()}, extensions={'cookiejar': YoutubeDLCookieJar()},
)) ), pot_request=pot_request, note='Custom requesting webpage')
assert sent_request.url == 'https://example.com' assert sent_request.url == 'https://example.com'
assert sent_request.headers['User-Agent'] == 'override-user-agent-override' assert sent_request.headers['User-Agent'] == 'override-user-agent-override'
assert sent_request.proxies == {'http': 'http://example-proxy-override.com'} assert sent_request.proxies == {'http': 'http://example-proxy-override.com'}
assert sent_request.extensions['cookiejar'] is not cookiejar_request assert sent_request.extensions['cookiejar'] is not cookiejar_request
assert 'Custom requesting webpage' in logger.messages['info']
def test_provider_request_webpage_no_log(self, ie, logger, pot_request):
provider = ExamplePTP(ie=ie, logger=logger, settings={})
def mock_urlopen(request):
return request
ie._downloader.urlopen = mock_urlopen
sent_request = provider._request_webpage(Request(
'https://example.com',
), note=False)
assert sent_request.url == 'https://example.com'
assert 'info' not in logger.messages
def test_provider_request_webpage_no_pot_request(self, ie, logger):
provider = ExamplePTP(ie=ie, logger=logger, settings={})
def mock_urlopen(request):
return request
ie._downloader.urlopen = mock_urlopen
sent_request = provider._request_webpage(Request(
'https://example.com',
), pot_request=None)
assert sent_request.url == 'https://example.com'
def test_get_config_arg(self, ie, logger): def test_get_config_arg(self, ie, logger):
provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']}) provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})

View File

@ -659,6 +659,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
self.assertEqual(url_or_none('ws://foo.de'), 'ws://foo.de')
self.assertEqual(url_or_none('wss://foo.de'), 'wss://foo.de')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)

View File

@ -85,6 +85,7 @@ class NiconicoLiveFD(FileDownloader):
'quality': live_quality, 'quality': live_quality,
'protocol': 'hls+fmp4', 'protocol': 'hls+fmp4',
'latency': live_latency, 'latency': live_latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False, 'chasePlay': False,
}, },
'room': { 'room': {

View File

@ -903,6 +903,7 @@ from .ivi import (
IviIE, IviIE,
) )
from .ivideon import IvideonIE from .ivideon import IvideonIE
from .ivoox import IvooxIE
from .iwara import ( from .iwara import (
IwaraIE, IwaraIE,
IwaraPlaylistIE, IwaraPlaylistIE,
@ -960,7 +961,10 @@ from .kick import (
) )
from .kicker import KickerIE from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kika import KikaIE from .kika import (
KikaIE,
KikaPlaylistIE,
)
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE from .kommunetv import KommunetvIE
@ -1061,6 +1065,7 @@ from .loom import (
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import ( from .lrt import (
LRTVODIE, LRTVODIE,
LRTRadioIE,
LRTStreamIE, LRTStreamIE,
) )
from .lsm import ( from .lsm import (
@ -1493,6 +1498,10 @@ from .paramountplus import (
) )
from .parler import ParlerIE from .parler import ParlerIE
from .parlview import ParlviewIE from .parlview import ParlviewIE
from .parti import (
PartiLivestreamIE,
PartiVideoIE,
)
from .patreon import ( from .patreon import (
PatreonCampaignIE, PatreonCampaignIE,
PatreonIE, PatreonIE,

View File

@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
'info_dict': { 'info_dict': {
'id': '91275', 'id': '91275',
'ext': 'aac', 'ext': 'mp3',
'title': 'md5:a9b15488009065556900169fb8061cce', 'title': 'md5:a9b15488009065556900169fb8061cce',
'episode': 'md5:a9b15488009065556900169fb8061cce', 'episode': 'md5:a9b15488009065556900169fb8061cce',
'series': 'Analizy', 'series': 'Analizy',
@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor):
raise ExtractorError('No such podcast', expected=True) raise ExtractorError('No such podcast', expected=True)
metadata = metadata[0] metadata = metadata[0]
formats = [] mp3_url = self._download_json(
for ext in ('aac', 'mp3'): 'https://api.podcast.radioagora.pl/api4/getSongUrl',
url_data = self._download_json( media_id, 'Downloading podcast mp3 URL', query={
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', 'podcast_id': media_id,
media_id, f'Downloading podcast {ext} URL') 'device_id': str(uuid.uuid4()),
# prevents inserting the mp3 (default) multiple times 'ppre': 'false',
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: 'audio': 'mp3',
formats.append({ })['link_ssl']
'url': url_data['link_ssl'],
'ext': ext,
'vcodec': 'none',
'acodec': ext,
})
return { return {
'id': media_id, 'id': media_id,
'formats': formats, 'url': mp3_url,
'vcodec': 'none',
'ext': 'mp3',
'title': metadata.get('podcast_name'), 'title': metadata.get('podcast_name'),
'series': metadata.get('series_name'), 'series': metadata.get('series_name'),
'episode': metadata.get('podcast_name'), 'episode': metadata.get('podcast_name'),

View File

@ -1570,6 +1570,8 @@ class InfoExtractor:
"""Yield all json ld objects in the html""" """Yield all json ld objects in the html"""
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
if not fatal and not isinstance(html, str):
return
for mobj in re.finditer(JSON_LD_RE, html): for mobj in re.finditer(JSON_LD_RE, html):
json_ld_item = self._parse_json( json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal, mobj.group('json_ld'), video_id, fatal=fatal,

View File

@ -5,7 +5,9 @@ from ..utils import (
int_or_none, int_or_none,
try_get, try_get,
unified_strdate, unified_strdate,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class CrowdBunkerIE(InfoExtractor): class CrowdBunkerIE(InfoExtractor):
@ -44,16 +46,15 @@ class CrowdBunkerIE(InfoExtractor):
'url': sub_url, 'url': sub_url,
}) })
mpd_url = try_get(video_json, lambda x: x['dashManifest']['url']) if mpd_url := traverse_obj(video_json, ('dashManifest', 'url', {url_or_none})):
if mpd_url: fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash', fatal=False)
fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
formats.extend(fmts) formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs) self._merge_subtitles(subs, target=subtitles)
m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
if m3u8_url: if m3u8_url := traverse_obj(video_json, ('hlsManifest', 'url', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id) fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts) formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs) self._merge_subtitles(subs, target=subtitles)
thumbnails = [{ thumbnails = [{
'url': image['url'], 'url': image['url'],

78
yt_dlp/extractor/ivoox.py Normal file
View File

@ -0,0 +1,78 @@
from .common import InfoExtractor
from ..utils import int_or_none, parse_iso8601, url_or_none, urljoin
from ..utils.traversal import traverse_obj
class IvooxIE(InfoExtractor):
_VALID_URL = (
r'https?://(?:www\.)?ivoox\.com/(?:\w{2}/)?[^/?#]+_rf_(?P<id>[0-9]+)_1\.html',
r'https?://go\.ivoox\.com/rf/(?P<id>[0-9]+)',
)
_TESTS = [{
'url': 'https://www.ivoox.com/dex-08x30-rostros-del-mal-los-asesinos-en-audios-mp3_rf_143594959_1.html',
'md5': '993f712de5b7d552459fc66aa3726885',
'info_dict': {
'id': '143594959',
'ext': 'mp3',
'timestamp': 1742731200,
'channel': 'DIAS EXTRAÑOS con Santiago Camacho',
'title': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
'description': 'md5:eae8b4b9740d0216d3871390b056bb08',
'uploader': 'Santiago Camacho',
'thumbnail': 'https://static-1.ivoox.com/audios/c/d/5/2/cd52f46783fe735000c33a803dce2554_XXL.jpg',
'upload_date': '20250323',
'episode': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
'duration': 11837,
'tags': ['españa', 'asesinos en serie', 'arropiero', 'historia criminal', 'mataviejas'],
},
}, {
'url': 'https://go.ivoox.com/rf/143594959',
'only_matching': True,
}, {
'url': 'https://www.ivoox.com/en/campodelgas-28-03-2025-audios-mp3_rf_144036942_1.html',
'only_matching': True,
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id, fatal=False)
data = self._search_nuxt_data(
webpage, media_id, fatal=False, traverse=('data', 0, 'data', 'audio'))
direct_download = self._download_json(
f'https://vcore-web.ivoox.com/v1/public/audios/{media_id}/download-url', media_id, fatal=False,
note='Fetching direct download link', headers={'Referer': url})
download_paths = {
*traverse_obj(direct_download, ('data', 'downloadUrl', {str}, filter, all)),
*traverse_obj(data, (('downloadUrl', 'mediaUrl'), {str}, filter)),
}
formats = []
for path in download_paths:
formats.append({
'url': urljoin('https://ivoox.com', path),
'http_headers': {'Referer': url},
})
return {
'id': media_id,
'formats': formats,
'uploader': self._html_search_regex(r'data-prm-author="([^"]+)"', webpage, 'author', default=None),
'timestamp': parse_iso8601(
self._html_search_regex(r'data-prm-pubdate="([^"]+)"', webpage, 'timestamp', default=None)),
'channel': self._html_search_regex(r'data-prm-podname="([^"]+)"', webpage, 'channel', default=None),
'title': self._html_search_regex(r'data-prm-title="([^"]+)"', webpage, 'title', default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
**self._search_json_ld(webpage, media_id, default={}),
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('uploadDate', {parse_iso8601(delimiter=' ')}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'name', {str}),
}),
}

View File

@ -1,3 +1,5 @@
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -124,3 +126,43 @@ class KikaIE(InfoExtractor):
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}), 'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}), }),
} }
class KikaPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w-]+/(?P<id>[a-z-]+\d+)'
_TESTS = [{
'url': 'https://www.kika.de/logo/logo-die-welt-und-ich-562',
'info_dict': {
'id': 'logo-die-welt-und-ich-562',
'title': 'logo!',
'description': 'md5:7b9d7f65561b82fa512f2cfb553c397d',
},
'playlist_count': 100,
}]
def _entries(self, playlist_url, playlist_id):
for page in itertools.count(1):
data = self._download_json(playlist_url, playlist_id, note=f'Downloading page {page}')
for item in traverse_obj(data, ('content', lambda _, v: url_or_none(v['api']['url']))):
yield self.url_result(
item['api']['url'], ie=KikaIE,
**traverse_obj(item, {
'id': ('id', {str}),
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'timestamp': ('date', {parse_iso8601}),
}))
playlist_url = traverse_obj(data, ('links', 'next', {url_or_none}))
if not playlist_url:
break
def _real_extract(self, url):
playlist_id = self._match_id(url)
brand_data = self._download_json(
f'https://www.kika.de/_next-api/proxy/v1/brands/{playlist_id}', playlist_id)
return self.playlist_result(
self._entries(brand_data['videoSubchannel']['videosPageUrl'], playlist_id),
playlist_id, title=brand_data.get('title'), description=brand_data.get('description'))

View File

@ -2,8 +2,11 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
merge_dicts, merge_dicts,
str_or_none,
traverse_obj, traverse_obj,
unified_timestamp,
url_or_none, url_or_none,
urljoin,
) )
@ -80,7 +83,7 @@ class LRTVODIE(LRTBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
path, video_id = self._match_valid_url(url).groups() path, video_id = self._match_valid_url(url).group('path', 'id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
media_url = self._extract_js_var(webpage, 'main_url', path) media_url = self._extract_js_var(webpage, 'main_url', path)
@ -106,3 +109,42 @@ class LRTVODIE(LRTBaseIE):
} }
return merge_dicts(clean_info, jw_data, json_ld_data) return merge_dicts(clean_info, jw_data, json_ld_data)
class LRTRadioIE(LRTBaseIE):
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/radioteka/irasas/(?P<id>\d+)/(?P<path>[^?#/]+)'
_TESTS = [{
# m3u8 download
'url': 'https://www.lrt.lt/radioteka/irasas/2000359728/nemarios-eiles-apie-pragarus-ir-skaistyklas-su-aiste-kiltinaviciute',
'info_dict': {
'id': '2000359728',
'ext': 'm4a',
'title': 'Nemarios eilės: apie pragarus ir skaistyklas su Aiste Kiltinavičiūte',
'description': 'md5:5eee9a0e86a55bf547bd67596204625d',
'timestamp': 1726143120,
'upload_date': '20240912',
'tags': 'count:5',
'thumbnail': r're:https?://.+/.+\.jpe?g',
'categories': ['Daiktiniai įrodymai'],
},
}, {
'url': 'https://www.lrt.lt/radioteka/irasas/2000304654/vakaras-su-knyga-svetlana-aleksijevic-cernobylio-malda-v-dalis?season=%2Fmediateka%2Faudio%2Fvakaras-su-knyga%2F2023',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, path = self._match_valid_url(url).group('id', 'path')
media = self._download_json(
'https://www.lrt.lt/radioteka/api/media', video_id,
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
return traverse_obj(media, {
'id': ('id', {int}, {str_or_none}),
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
'description': ('content', {clean_html}, {str}),
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
})

View File

@ -10,7 +10,9 @@ from ..utils import (
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class MixcloudBaseIE(InfoExtractor): class MixcloudBaseIE(InfoExtractor):
@ -37,7 +39,7 @@ class MixcloudIE(MixcloudBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': 'Cryptkeeper', 'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach', 'uploader': 'dholbach',
'uploader_id': 'dholbach', 'uploader_id': 'dholbach',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'view_count': int, 'view_count': int,
@ -46,10 +48,11 @@ class MixcloudIE(MixcloudBaseIE):
'uploader_url': 'https://www.mixcloud.com/dholbach/', 'uploader_url': 'https://www.mixcloud.com/dholbach/',
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills', 'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
'duration': 3723, 'duration': 3723,
'tags': [], 'tags': ['liquid drum and bass', 'drum and bass'],
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'artists': list,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -67,7 +70,7 @@ class MixcloudIE(MixcloudBaseIE):
'upload_date': '20150203', 'upload_date': '20150203',
'uploader_url': 'https://www.mixcloud.com/gillespeterson/', 'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
'duration': 2992, 'duration': 2992,
'tags': [], 'tags': ['jazz', 'soul', 'world music', 'funk'],
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -149,8 +152,6 @@ class MixcloudIE(MixcloudBaseIE):
elif reason: elif reason:
raise ExtractorError('Track is restricted', expected=True) raise ExtractorError('Track is restricted', expected=True)
title = cloudcast['name']
stream_info = cloudcast['streamInfo'] stream_info = cloudcast['streamInfo']
formats = [] formats = []
@ -182,47 +183,39 @@ class MixcloudIE(MixcloudBaseIE):
self.raise_login_required(metadata_available=True) self.raise_login_required(metadata_available=True)
comments = [] comments = []
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): for node in traverse_obj(cloudcast, ('comments', 'edges', ..., 'node', {dict})):
node = edge.get('node') or {}
text = strip_or_none(node.get('comment')) text = strip_or_none(node.get('comment'))
if not text: if not text:
continue continue
user = node.get('user') or {}
comments.append({ comments.append({
'author': user.get('displayName'),
'author_id': user.get('username'),
'text': text, 'text': text,
'timestamp': parse_iso8601(node.get('created')), **traverse_obj(node, {
'author': ('user', 'displayName', {str}),
'author_id': ('user', 'username', {str}),
'timestamp': ('created', {parse_iso8601}),
}),
}) })
tags = []
for t in cloudcast.get('tags'):
tag = try_get(t, lambda x: x['tag']['name'], str)
if not tag:
tags.append(tag)
get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
owner = cloudcast.get('owner') or {}
return { return {
'id': track_id, 'id': track_id,
'title': title,
'formats': formats, 'formats': formats,
'description': cloudcast.get('description'),
'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], str),
'uploader': owner.get('displayName'),
'timestamp': parse_iso8601(cloudcast.get('publishDate')),
'uploader_id': owner.get('username'),
'uploader_url': owner.get('url'),
'duration': int_or_none(cloudcast.get('audioLength')),
'view_count': int_or_none(cloudcast.get('plays')),
'like_count': get_count('favorites'),
'repost_count': get_count('reposts'),
'comment_count': get_count('comments'),
'comments': comments, 'comments': comments,
'tags': tags, **traverse_obj(cloudcast, {
'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, 'title': ('name', {str}),
'description': ('description', {str}),
'thumbnail': ('picture', 'url', {url_or_none}),
'timestamp': ('publishDate', {parse_iso8601}),
'duration': ('audioLength', {int_or_none}),
'uploader': ('owner', 'displayName', {str}),
'uploader_id': ('owner', 'username', {str}),
'uploader_url': ('owner', 'url', {url_or_none}),
'view_count': ('plays', {int_or_none}),
'like_count': ('favorites', 'totalCount', {int_or_none}),
'repost_count': ('reposts', 'totalCount', {int_or_none}),
'comment_count': ('comments', 'totalCount', {int_or_none}),
'tags': ('tags', ..., 'tag', 'name', {str}, filter, all, filter),
'artists': ('featuringArtistList', ..., {str}, filter, all, filter),
}),
} }
@ -295,7 +288,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/', 'url': 'http://www.mixcloud.com/dholbach/',
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'dholbach (uploads)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
'playlist_mincount': 36, 'playlist_mincount': 36,
@ -303,7 +296,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/uploads/', 'url': 'http://www.mixcloud.com/dholbach/uploads/',
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'dholbach (uploads)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
'playlist_mincount': 36, 'playlist_mincount': 36,
@ -311,7 +304,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'url': 'http://www.mixcloud.com/dholbach/favorites/', 'url': 'http://www.mixcloud.com/dholbach/favorites/',
'info_dict': { 'info_dict': {
'id': 'dholbach_favorites', 'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)', 'title': 'dholbach (favorites)',
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b', 'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
}, },
# 'params': { # 'params': {
@ -337,7 +330,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'title': 'First Ear (stream)', 'title': 'First Ear (stream)',
'description': 'we maraud for ears', 'description': 'we maraud for ears',
}, },
'playlist_mincount': 269, 'playlist_mincount': 267,
}] }]
_TITLE_KEY = 'displayName' _TITLE_KEY = 'displayName'
@ -361,7 +354,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'id': 'maxvibes_jazzcat-on-ness-radio', 'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Ness Radio sessions', 'title': 'Ness Radio sessions',
}, },
'playlist_mincount': 59, 'playlist_mincount': 58,
}] }]
_TITLE_KEY = 'name' _TITLE_KEY = 'name'
_DESCRIPTION_KEY = 'description' _DESCRIPTION_KEY = 'description'

View File

@ -27,6 +27,7 @@ from ..utils import (
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
unified_timestamp,
update_url_query, update_url_query,
url_basename, url_basename,
url_or_none, url_or_none,
@ -985,6 +986,7 @@ class NiconicoLiveIE(InfoExtractor):
'quality': 'abr', 'quality': 'abr',
'protocol': 'hls+fmp4', 'protocol': 'hls+fmp4',
'latency': latency, 'latency': latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False, 'chasePlay': False,
}, },
'room': { 'room': {
@ -1005,6 +1007,7 @@ class NiconicoLiveIE(InfoExtractor):
if data.get('type') == 'stream': if data.get('type') == 'stream':
m3u8_url = data['data']['uri'] m3u8_url = data['data']['uri']
qualities = data['data']['availableQualities'] qualities = data['data']['availableQualities']
cookies = data['data']['cookies']
break break
elif data.get('type') == 'disconnect': elif data.get('type') == 'disconnect':
self.write_debug(recv) self.write_debug(recv)
@ -1043,6 +1046,11 @@ class NiconicoLiveIE(InfoExtractor):
**res, **res,
}) })
for cookie in cookies:
self._set_cookie(
cookie['domain'], cookie['name'], cookie['value'],
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
for fmt, q in zip(formats, reversed(qualities[1:])): for fmt, q in zip(formats, reversed(qualities[1:])):
fmt.update({ fmt.update({

101
yt_dlp/extractor/parti.py Normal file
View File

@ -0,0 +1,101 @@
from .common import InfoExtractor
from ..utils import UserNotLive, int_or_none, parse_iso8601, url_or_none, urljoin
from ..utils.traversal import traverse_obj
class PartiBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note=None):
return self._download_json(
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
class PartiVideoIE(PartiBaseIE):
IE_NAME = 'parti:video'
_VALID_URL = r'https?://(?:www\.)?parti\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://parti.com/video/66284',
'info_dict': {
'id': '66284',
'ext': 'mp4',
'title': 'NOW LIVE ',
'upload_date': '20250327',
'categories': ['Gaming'],
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
'channel': 'ItZTMGG',
'timestamp': 1743044379,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._call_api(f'get_livestream_channel_info/recent/{video_id}', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
**traverse_obj(data, {
'title': ('event_title', {str}),
'channel': ('user_name', {str}),
'thumbnail': ('event_file', {url_or_none}),
'categories': ('category_name', {str}, filter, all),
'timestamp': ('event_start_ts', {int_or_none}),
}),
}
class PartiLivestreamIE(PartiBaseIE):
IE_NAME = 'parti:livestream'
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
_TESTS = [{
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
'info_dict': {
'id': 'Capt_Robs_Adventures',
'ext': 'mp4',
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
'view_count': int,
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
'timestamp': 1743879776,
'upload_date': '20250405',
'live_status': 'is_live',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
'only_matching': True,
}]
def _real_extract(self, url):
service, creator_slug = self._match_valid_url(url).group('service', 'id')
encoded_creator_slug = creator_slug.replace('/', '%23')
creator_id = self._call_api(
f'get_user_by_social_media/{service}/{encoded_creator_slug}',
creator_slug, note='Fetching user ID')
data = self._call_api(
f'get_livestream_channel_info/{creator_id}', creator_id,
note='Fetching user profile feed')['channel_info']
if not traverse_obj(data, ('channel', 'is_live', {bool})):
raise UserNotLive(video_id=creator_id)
channel_info = data['channel']
return {
'id': creator_slug,
'formats': self._extract_m3u8_formats(
channel_info['playback_url'], creator_slug, live=True, query={
'token': channel_info['playback_auth_token'],
'player_version': '1.17.0',
}),
'is_live': True,
**traverse_obj(data, {
'title': ('livestream_event_info', 'event_name', {str}),
'description': ('livestream_event_info', 'event_description', {str}),
'thumbnail': ('livestream_event_info', 'livestream_preview_file', {url_or_none}),
'timestamp': ('stream', 'start_time', {parse_iso8601}),
'view_count': ('stream', 'viewer_count', {int_or_none}),
}),
}

View File

@ -7,7 +7,6 @@ from ..utils import (
ExtractorError, ExtractorError,
UnsupportedError, UnsupportedError,
clean_html, clean_html,
determine_ext,
extract_attributes, extract_attributes,
format_field, format_field,
get_element_by_class, get_element_by_class,
@ -36,7 +35,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20191020', 'upload_date': '20191020',
'channel_url': 'https://rumble.com/c/WMAR', 'channel_url': 'https://rumble.com/c/WMAR',
'channel': 'WMAR', 'channel': 'WMAR',
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 234, 'duration': 234,
'uploader': 'WMAR', 'uploader': 'WMAR',
'live_status': 'not_live', 'live_status': 'not_live',
@ -52,7 +51,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20220217', 'upload_date': '20220217',
'channel_url': 'https://rumble.com/c/CyberTechNews', 'channel_url': 'https://rumble.com/c/CyberTechNews',
'channel': 'CTNews', 'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 901, 'duration': 901,
'uploader': 'CTNews', 'uploader': 'CTNews',
'live_status': 'not_live', 'live_status': 'not_live',
@ -114,6 +113,22 @@ class RumbleEmbedIE(InfoExtractor):
'live_status': 'was_live', 'live_status': 'was_live',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'url': 'https://rumble.com/embed/v6pezdb',
'info_dict': {
'id': 'v6pezdb',
'ext': 'mp4',
'title': '"Es war einmal ein Mädchen" Ein filmisches Zeitzeugnis aus Leningrad 1944',
'uploader': 'RT DE',
'channel': 'RT DE',
'channel_url': 'https://rumble.com/c/RTDE',
'duration': 309,
'thumbnail': 'https://1a-1791.com/video/fww1/dc/s8/1/n/z/2/y/nz2yy.qR4e-small-Es-war-einmal-ein-Mdchen-Ei.jpg',
'timestamp': 1743703500,
'upload_date': '20250403',
'live_status': 'not_live',
},
'params': {'skip_download': True},
}, { }, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True, 'only_matching': True,
@ -168,40 +183,42 @@ class RumbleEmbedIE(InfoExtractor):
live_status = None live_status = None
formats = [] formats = []
for ext, ext_info in (video.get('ua') or {}).items(): for format_type, format_info in (video.get('ua') or {}).items():
if isinstance(ext_info, dict): if isinstance(format_info, dict):
for height, video_info in ext_info.items(): for height, video_info in format_info.items():
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
video_info.setdefault('meta', {})['h'] = height video_info.setdefault('meta', {})['h'] = height
ext_info = ext_info.values() format_info = format_info.values()
for video_info in ext_info: for video_info in format_info:
meta = video_info.get('meta') or {} meta = video_info.get('meta') or {}
if not video_info.get('url'): if not video_info.get('url'):
continue continue
if ext == 'hls': # With default query params returns m3u8 variants which are duplicates, without returns tar files
if format_type == 'tar':
continue
if format_type == 'hls':
if meta.get('live') is True and video.get('live') == 1: if meta.get('live') is True and video.get('live') == 1:
live_status = 'post_live' live_status = 'post_live'
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_info['url'], video_id, video_info['url'], video_id,
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
continue continue
timeline = ext == 'timeline' is_timeline = format_type == 'timeline'
if timeline: is_audio = format_type == 'audio'
ext = determine_ext(video_info['url'])
formats.append({ formats.append({
'ext': ext, 'acodec': 'none' if is_timeline else None,
'acodec': 'none' if timeline else None, 'vcodec': 'none' if is_audio else None,
'url': video_info['url'], 'url': video_info['url'],
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), 'format_id': join_nonempty(format_type, format_field(meta, 'h', '%sp')),
'format_note': 'Timeline' if timeline else None, 'format_note': 'Timeline' if is_timeline else None,
'fps': None if timeline else video.get('fps'), 'fps': None if is_timeline or is_audio else video.get('fps'),
**traverse_obj(meta, { **traverse_obj(meta, {
'tbr': 'bitrate', 'tbr': ('bitrate', {int_or_none}),
'filesize': 'size', 'filesize': ('size', {int_or_none}),
'width': 'w', 'width': ('w', {int_or_none}),
'height': 'h', 'height': ('h', {int_or_none}),
}, expected_type=lambda x: int(x) or None), }),
}) })
subtitles = { subtitles = {

View File

@ -2,15 +2,17 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
bug_reports_message,
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
parse_qs, parse_qs,
traverse_obj, qualities,
try_get, try_get,
update_url_query,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class YandexVideoIE(InfoExtractor): class YandexVideoIE(InfoExtractor):
@ -186,7 +188,22 @@ class YandexVideoPreviewIE(InfoExtractor):
return self.url_result(data_json['video']['url']) return self.url_result(data_json['video']['url'])
class ZenYandexIE(InfoExtractor): class ZenYandexBaseIE(InfoExtractor):
def _fetch_ssr_data(self, url, video_id):
webpage = self._download_webpage(url, video_id)
redirect = self._search_json(
r'(?:var|let|const)\s+it\s*=', webpage, 'redirect', video_id, default={}).get('retpath')
if redirect:
video_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
return video_id, self._search_json(
r'(?:var|let|const)\s+_params\s*=\s*\(', webpage, 'metadata', video_id,
contains_pattern=r'{["\']ssrData.+}')['ssrData']
class ZenYandexIE(ZenYandexBaseIE):
IE_NAME = 'dzen.ru'
IE_DESC = 'Дзен (dzen) formerly Яндекс.Дзен (Yandex Zen)'
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)' _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7', 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
@ -216,6 +233,7 @@ class ZenYandexIE(InfoExtractor):
'timestamp': 1573465585, 'timestamp': 1573465585,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'The page does not exist',
}, { }, {
'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3', 'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
'info_dict': { 'info_dict': {
@ -227,6 +245,9 @@ class ZenYandexIE(InfoExtractor):
'uploader': 'TechInsider', 'uploader': 'TechInsider',
'timestamp': 1611378221, 'timestamp': 1611378221,
'upload_date': '20210123', 'upload_date': '20210123',
'view_count': int,
'duration': 243,
'tags': ['опыт', 'эксперимент', 'огонь'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -240,6 +261,9 @@ class ZenYandexIE(InfoExtractor):
'uploader': 'TechInsider', 'uploader': 'TechInsider',
'upload_date': '20210123', 'upload_date': '20210123',
'timestamp': 1611378221, 'timestamp': 1611378221,
'view_count': int,
'duration': 243,
'tags': ['опыт', 'эксперимент', 'огонь'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -252,44 +276,56 @@ class ZenYandexIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_id, ssr_data = self._fetch_ssr_data(url, video_id)
redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath') video_data = ssr_data['videoMetaResponse']
if redirect:
video_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
data_json = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
webpage, 'uploader', default='<a>')
uploader_name = extract_attributes(uploader).get('aria-label')
item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
formats, subtitles = [], {} formats, subtitles = [], {}
for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})): quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
# Deduplicate stream URLs. The "dzen_dash" query parameter is present in some URLs but can be omitted
stream_urls = set(traverse_obj(video_data, (
'video', ('id', ('streams', ...), ('mp4Streams', ..., 'url'), ('oneVideoStreams', ..., 'url')),
{url_or_none}, {update_url_query(query={'dzen_dash': []})})))
for s_url in stream_urls:
ext = determine_ext(s_url) ext = determine_ext(s_url)
if ext == 'mpd': content_type = traverse_obj(parse_qs(s_url), ('ct', 0))
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash') if ext == 'mpd' or content_type == '6':
elif ext == 'm3u8': fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash', fatal=False)
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4') elif ext == 'm3u8' or content_type == '8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif content_type == '0':
format_type = traverse_obj(parse_qs(s_url), ('type', 0))
formats.append({
'url': s_url,
'format_id': format_type,
'ext': 'mp4',
'quality': quality(format_type),
})
continue
else:
self.report_warning(f'Unsupported stream URL: {s_url}{bug_reports_message()}')
continue
formats.extend(fmts) formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs) self._merge_subtitles(subs, target=subtitles)
return { return {
'id': video_id, 'id': video_id,
'title': video_json.get('title') or self._og_search_title(webpage),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': int_or_none(video_json.get('duration')), **traverse_obj(video_data, {
'view_count': int_or_none(video_json.get('views')), 'title': ('title', {str}),
'timestamp': int_or_none(video_json.get('publicationDate')), 'description': ('description', {str}),
'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']), 'thumbnail': ('image', {url_or_none}),
'description': video_json.get('description') or self._og_search_description(webpage), 'duration': ('video', 'duration', {int_or_none}),
'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']), 'view_count': ('video', 'views', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none}),
'tags': ('tags', ..., {str}),
'uploader': ('source', 'title', {str}),
}),
} }
class ZenYandexChannelIE(InfoExtractor): class ZenYandexChannelIE(ZenYandexBaseIE):
IE_NAME = 'dzen.ru:channel'
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)' _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://zen.yandex.ru/tok_media', 'url': 'https://zen.yandex.ru/tok_media',
@ -323,8 +359,8 @@ class ZenYandexChannelIE(InfoExtractor):
'url': 'https://zen.yandex.ru/jony_me', 'url': 'https://zen.yandex.ru/jony_me',
'info_dict': { 'info_dict': {
'id': 'jony_me', 'id': 'jony_me',
'description': 'md5:ce0a5cad2752ab58701b5497835b2cc5', 'description': 'md5:7c30d11dc005faba8826feae99da3113',
'title': 'JONY ', 'title': 'JONY',
}, },
'playlist_count': 18, 'playlist_count': 18,
}, { }, {
@ -333,9 +369,8 @@ class ZenYandexChannelIE(InfoExtractor):
'url': 'https://zen.yandex.ru/tatyanareva', 'url': 'https://zen.yandex.ru/tatyanareva',
'info_dict': { 'info_dict': {
'id': 'tatyanareva', 'id': 'tatyanareva',
'description': 'md5:40a1e51f174369ec3ba9d657734ac31f', 'description': 'md5:92e56fa730a932ca2483ba5c2186ad96',
'title': 'Татьяна Рева', 'title': 'Татьяна Рева',
'entries': 'maxcount:200',
}, },
'playlist_mincount': 46, 'playlist_mincount': 46,
}, { }, {
@ -348,43 +383,31 @@ class ZenYandexChannelIE(InfoExtractor):
'playlist_mincount': 657, 'playlist_mincount': 657,
}] }]
def _entries(self, item_id, server_state_json, server_settings_json): def _entries(self, feed_data, channel_id):
items = (traverse_obj(server_state_json, ('feed', 'items', ...))
or traverse_obj(server_settings_json, ('exportData', 'items', ...)))
more = (traverse_obj(server_state_json, ('links', 'more'))
or traverse_obj(server_settings_json, ('exportData', 'more', 'link')))
next_page_id = None next_page_id = None
for page in itertools.count(1): for page in itertools.count(1):
for item in items or []: for item in traverse_obj(feed_data, (
if item.get('type') != 'gif': (None, ('items', lambda _, v: v['tab'] in ('shorts', 'longs'))),
continue 'items', lambda _, v: url_or_none(v['link']),
video_id = traverse_obj(item, 'publication_id', 'publicationId') or '' )):
yield self.url_result(item['link'], ZenYandexIE, video_id.split(':')[-1]) yield self.url_result(item['link'], ZenYandexIE, item.get('id'), title=item.get('title'))
more = traverse_obj(feed_data, ('more', 'link', {url_or_none}))
current_page_id = next_page_id current_page_id = next_page_id
next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1)) next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
if not all((more, items, next_page_id, next_page_id != current_page_id)): if not all((more, next_page_id, next_page_id != current_page_id)):
break break
data = self._download_json(more, item_id, note=f'Downloading Page {page}') feed_data = self._download_json(more, channel_id, note=f'Downloading Page {page}')
items, more = data.get('items'), traverse_obj(data, ('more', 'link'))
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url) channel_id = self._match_id(url)
webpage = self._download_webpage(url, item_id) channel_id, ssr_data = self._fetch_ssr_data(url, channel_id)
redirect = self._search_json( channel_data = ssr_data['exportResponse']
r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath')
if redirect:
item_id = self._match_id(redirect)
webpage = self._download_webpage(redirect, item_id, note='Redirecting')
data = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'channel data', item_id, contains_pattern=r'{\"__serverState__.+}')
server_state_json = traverse_obj(data, lambda k, _: k.startswith('__serverState__'), get_all=False)
server_settings_json = traverse_obj(data, lambda k, _: k.startswith('__serverSettings__'), get_all=False)
return self.playlist_result( return self.playlist_result(
self._entries(item_id, server_state_json, server_settings_json), self._entries(channel_data['feedData'], channel_id),
item_id, traverse_obj(server_state_json, ('channel', 'source', 'title')), channel_id, **traverse_obj(channel_data, ('channel', 'source', {
traverse_obj(server_state_json, ('channel', 'source', 'description'))) 'title': ('title', {str}),
'description': ('description', {str}),
})))

View File

@ -524,10 +524,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
response = self._extract_response( response = self._extract_response(
item_id=f'{item_id} page {page_num}', item_id=f'{item_id} page {page_num}',
query=continuation, headers=headers, ytcfg=ytcfg, query=continuation, headers=headers, ytcfg=ytcfg,
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) check_get_keys=(
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
# Playlist recommendations may return with no data - ignore
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
))
if not response: if not response:
break break
continuation = None
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
# See: https://github.com/ytdl-org/youtube-dl/issues/28702 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self._extract_visitor_data(response) or visitor_data visitor_data = self._extract_visitor_data(response) or visitor_data
@ -564,7 +570,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield from func(video_items_renderer) yield from func(video_items_renderer)
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
if not video_items_renderer: # In the case only a continuation is returned, try to follow it.
# We extract this after trying to extract non-continuation items as otherwise this
# may be prioritized over other continuations.
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
if not continuation and not video_items_renderer:
break break
@staticmethod @staticmethod
@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'playlists, series', 'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'basic, single video playlist', 'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': { 'info_dict': {
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'title': 'youtube-dl public playlist', 'title': 'single video playlist',
'description': '', 'description': '',
'tags': [], 'tags': [],
'view_count': int, 'view_count': int,
'modified_date': '20201130', 'modified_date': '20250417',
'channel': 'Sergey M.', 'channel': 'cole-dlp-test-acc',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'availability': 'public', 'availability': 'public',
'uploader': 'Sergey M.', 'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/@sergeym.6173', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@sergeym.6173', 'uploader_id': '@coletdjnz',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 17, 'playlist_mincount': 17,
}, { }, {
'note': 'Community tab', 'note': 'Posts tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': { 'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community', 'title': 'lex will - Posts',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will', 'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 18, 'playlist_mincount': 18,
}, { }, {
'note': 'Channels tab', # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'channel_follower_count': int,
'uploader_url': 'https://www.youtube.com/@lexwill718',
'uploader_id': '@lexwill718',
'uploader': 'lex will',
},
'playlist_mincount': 12,
}, {
'note': 'Search tab', 'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40, 'playlist_mincount': 40,
'info_dict': { 'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra', 'title': '3Blue1Brown - Search - linear algebra',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'], 'tags': ['Mathematics'],
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button', 'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', 'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'info_dict': { 'info_dict': {
'title': 'Uploads from Phim Siêu Nhân Nhật Bản', 'title': 'The Memes Of 2010s.....',
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', 'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'view_count': int, 'view_count': int,
'channel': 'Phim Siêu Nhân Nhật Bản', 'channel': "I'm Not JiNxEd",
'tags': [], 'tags': [],
'description': '', 'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public', 'availability': 'public',
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban', 'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
'uploader_id': '@phimsieunhannhatban', 'uploader_id': '@imnotjinxed1998',
'uploader': 'Phim Siêu Nhân Nhật Bản', 'uploader': "I'm Not JiNxEd",
}, },
'playlist_mincount': 200, 'playlist_mincount': 150,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
'note': 'Playlist with unavailable videos in page 7', 'note': 'Playlist with unavailable videos in page 7',
@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1000, 'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix availability extraction
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': { 'info_dict': {
@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': { 'info_dict': {
'id': 'hGkQjiJLjWQ', # This will keep changing 'id': 'YDvsBbKfLPA', # This will keep changing
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'upload_date': r're:\d{8}', 'upload_date': r're:\d{8}',
@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@SkyNews', 'uploader_id': '@SkyNews',
'uploader': 'Sky News', 'uploader': 'Sky News',
'channel_is_verified': True, 'channel_is_verified': True,
'media_type': 'livestream',
'timestamp': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'VLPL, should redirect to playlist?list=PL...', 'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'info_dict': { 'info_dict': {
@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
# Treat as a general feed # Treat as a general feed
# TODO: fix extraction
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': { 'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['YouTube Music is not directly supported'], 'expected_warnings': ['YouTube Music is not directly supported'],
}, { }, {
'note': 'unlisted single video playlist', 'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': { 'info_dict': {
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'title': 'yt-dlp unlisted playlist test', 'title': 'unlisted playlist',
'availability': 'unlisted', 'availability': 'unlisted',
'tags': [], 'tags': [],
'modified_date': '20220418', 'modified_date': '20250417',
'channel': 'colethedj', 'channel': 'cole-dlp-test-acc',
'view_count': int, 'view_count': int,
'description': '', 'description': '',
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader_url': 'https://www.youtube.com/@colethedj1894', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@colethedj1894', 'uploader_id': '@coletdjnz',
'uploader': 'colethedj', 'uploader': 'cole-dlp-test-acc',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# By default, recommended is always empty.
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
'url': 'https://www.youtube.com/feed/recommended', 'url': 'https://www.youtube.com/feed/recommended',
'info_dict': { 'info_dict': {
@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'recommended', 'title': 'recommended',
'tags': [], 'tags': [],
}, },
'playlist_mincount': 50, 'playlist_count': 0,
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}}, 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'skip': 'Query for sorting no longer works', 'skip': 'Query for sorting no longer works',
}, { }, {
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
'info_dict': { 'info_dict': {
@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': { 'info_dict': {
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'modified_date': '20220407', 'modified_date': '20250115',
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
'tags': [], 'tags': [],
'availability': 'unlisted', 'availability': 'unlisted',
@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['Preferring "ja"'], 'expected_warnings': ['Preferring "ja"'],
}, { }, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that # XXX: this should really check flat playlist entries, but the test suite doesn't support that
# TODO: fix availability extraction
'note': 'preferred lang set with playlist with translated video titles', 'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': { 'info_dict': {
@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# shorts audio pivot for 2GtVksBMYFM. # shorts audio pivot for 2GtVksBMYFM.
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==', 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
# TODO: fix extraction
'info_dict': { 'info_dict': {
'id': 'sfv_audio_pivot', 'id': 'sfv_audio_pivot',
'title': 'sfv_audio_pivot', 'title': 'sfv_audio_pivot',
@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 8, 'playlist_mincount': 8,
}, { }, {
# Should get three playlists for videos, shorts and streams tabs # Should get three playlists for videos, shorts and streams tabs
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': { 'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2', 'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
'channel': 'Polka Ch. 尾丸ポルカ', 'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35', 'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka', 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 3, 'playlist_count': 3,
}, { }, {
# Shorts tab with channel with handle # Shorts tab with channel with handle
# TODO: fix channel description # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@NotJustBikes/shorts', 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': { 'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A', 'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts', 'title': 'Not Just Bikes - Shorts',
'tags': 'count:10', 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:5e82545b3a041345927a92d0585df247', 'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes', 'channel': 'Not Just Bikes',
@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一', 'channel': '中村悠一',
'channel_follower_count': int, 'channel_follower_count': int,
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300', 'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura', 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura', 'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一', 'uploader': '中村悠一',
@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
# No videos tab but has a shorts tab # No videos tab but has a shorts tab
# TODO: fix metadata extraction
'url': 'https://www.youtube.com/c/TKFShorts', 'url': 'https://www.youtube.com/c/TKFShorts',
'info_dict': { 'info_dict': {
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg', 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Shorts url result in shorts tab # Shorts url result in shorts tab
# TODO: Fix channel id extraction # TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# Live video status should be extracted # Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': { 'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg', 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
# Channel renderer metadata. Contains number of videos on the channel # Channel renderer metadata. Contains number of videos on the channel
# TODO: channels tab removed, change this test to use another page with channel renderer
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
'skip': 'channels tab removed',
}, { }, {
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@3blue1brown/about', 'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': { 'info_dict': {
'id': '@3blue1brown', 'id': '@3blue1brown',
@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'uploader_url': 'https://www.youtube.com/@3blue1brown', 'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown', 'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown', 'uploader': '3Blue1Brown',
@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 5, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',
'info_dict': { 'info_dict': {
'id': 'UCgFwu-j5-xNJml2FtTrrB3A', 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 100, 'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'Tags containing spaces', 'note': 'Tags containing spaces',
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'playlist_count': 3, 'playlist_count': 3,
@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'challenges', 'sketches', 'scary games', 'funny games', 'rage games', 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
'mark fischbach'], 'mark fischbach'],
}, },
}, {
# https://github.com/yt-dlp/yt-dlp/issues/12933
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
'url': 'https://www.youtube.com/@sbcitygov/streams',
'playlist_mincount': 150,
'info_dict': {
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'channel': 'sbcitygov',
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'title': 'sbcitygov - Live',
'channel_follower_count': int,
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
'uploader_id': '@sbcitygov',
'uploader_url': 'https://www.youtube.com/@sbcitygov',
'uploader': 'sbcitygov',
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
'tags': [],
},
}] }]
@classmethod @classmethod

View File

@ -2943,7 +2943,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(f'{kwargs.get("video_id")}: No {pot_request.context.value} PO Token available for {client} client') self.write_debug(f'{kwargs.get("video_id")}: No {pot_request.context.value} PO Token available for {client} client')
return return
self.write_debug(f'{kwargs.get("video_id")}: Fetched a {pot_request.context.value} PO Token for {client} client') self.write_debug(f'{kwargs.get("video_id")}: Retrieved a {pot_request.context.value} PO Token for {client} client')
return po_token return po_token
@staticmethod @staticmethod
@ -3712,6 +3712,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'sign in' in reason.lower(): if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more') reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}' reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
reason += '. YouTube is requiring a captcha challenge before playback'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -3940,7 +3942,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not traverse_obj(initial_data, 'contents'): if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.') self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None initial_data = None
if not initial_data: if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
query = {'videoId': video_id} query = {'videoId': video_id}
query.update(self._get_checkok_params()) query.update(self._get_checkok_params())
initial_data = self._extract_response( initial_data = self._extract_response(

View File

@ -90,15 +90,15 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP
# For this example, the extractor arg would be `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"` # For this example, the extractor arg would be `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"`
external_provider_url = self._configuration_arg('url', default=['https://provider.example.com/get_pot'])[0] external_provider_url = self._configuration_arg('url', default=['https://provider.example.com/get_pot'])[0]
# See below for logging guidelines
self.logger.trace(f'Using external provider URL: {external_provider_url}')
# You should use the internal HTTP client to make requests where possible, # You should use the internal HTTP client to make requests where possible,
# as it will handle cookies and other networking settings passed to yt-dlp. # as it will handle cookies and other networking settings passed to yt-dlp.
try: try:
# See below for logging guidelines # See docstring in _request_webpage method for request tips
self.logger.info(f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider') response = self._request_webpage(
Request(external_provider_url, data=json.dumps({
# See docstring in _urlopen method for request tips
response = self._urlopen(
request, Request(external_provider_url, data=json.dumps({
'content_binding': get_webpo_content_binding(request), 'content_binding': get_webpo_content_binding(request),
'proxy': request.request_proxy, 'proxy': request.request_proxy,
'headers': request.request_headers, 'headers': request.request_headers,
@ -107,7 +107,10 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP
# Important: If your provider has its own caching, please respect `bypass_cache`. # Important: If your provider has its own caching, please respect `bypass_cache`.
# This may be used in the future to request a fresh PO Token if required. # This may be used in the future to request a fresh PO Token if required.
'do_not_cache': request.bypass_cache, 'do_not_cache': request.bypass_cache,
}).encode(), proxies={'all': None})) }).encode(), proxies={'all': None}),
pot_request=request,
note=f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider',
)
except RequestError as e: except RequestError as e:
# If there is an error, raise PoTokenProviderError. # If there is an error, raise PoTokenProviderError.

View File

@ -17,7 +17,7 @@ from yt_dlp.extractor.youtube.pot._provider import (
register_provider_generic, register_provider_generic,
) )
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences
from yt_dlp.networking import Request from yt_dlp.networking import Request, Response
from yt_dlp.utils import traverse_obj from yt_dlp.utils import traverse_obj
from yt_dlp.utils.networking import HTTPHeaderDict from yt_dlp.utils.networking import HTTPHeaderDict
@ -135,27 +135,34 @@ class PoTokenProvider(IEContentProvider, abc.ABC, suffix='PTP'):
# Helper functions # Helper functions
def _urlopen(self, pot_request: PoTokenRequest, http_request: Request): def _request_webpage(self, request: Request, pot_request: PoTokenRequest | None = None, note=None, **kwargs) -> Response:
"""Make a request using the request parameters from the PoTokenRequest. """Make a request using the internal HTTP Client.
Use this instead of calling requests, urllib3 or other HTTP client libraries directly!! Use this instead of calling requests, urllib3 or other HTTP client libraries directly!
YouTube cookies will be automatically applied if this request is made to YouTube. YouTube cookies will be automatically applied if this request is made to YouTube.
@param request: The request to make
@param pot_request: The PoTokenRequest to use. Request parameters will be merged from it.
@param note: Custom log message to display when making the request. Set to `False` to disable logging.
Tips: Tips:
- Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None}) - Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None})
- Set request timeout: Request(..., extensions={'timeout': 5.0}) - Set request timeout: Request(..., extensions={'timeout': 5.0})
""" """
req = http_request.copy() req = request.copy()
# Merge some ctx request settings into the request # Merge some ctx request settings into the request
# Most of these will already be used by the configured ydl instance, # Most of these will already be used by the configured ydl instance,
# however, the YouTube extractor may override some. # however, the YouTube extractor may override some.
if pot_request is not None:
req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers) req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers)
req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {}) req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {})
if pot_request.request_cookiejar is not None: if pot_request.request_cookiejar is not None:
req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar) req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar)
if note is not False:
self.logger.info(str(note) if note else 'Requesting webpage')
return self.ie._downloader.urlopen(req) return self.ie._downloader.urlopen(req)

View File

@ -5,6 +5,7 @@ from __future__ import annotations
import base64 import base64
import contextlib import contextlib
import enum import enum
import re
import urllib.parse import urllib.parse
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
@ -58,7 +59,7 @@ def _extract_visitor_id(visitor_data):
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode() visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode()
# check that visitor id is all letters and numbers # check that visitor id is all letters and numbers
if visitor_id.isalnum() and len(visitor_id) == 11: if re.fullmatch(r'[A-Za-z0-9_-]{11}', visitor_id):
return visitor_id return visitor_id
return None return None

View File

@ -3,6 +3,7 @@ import warnings
from .common import ( from .common import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,

View File

@ -505,6 +505,7 @@ class Request:
HEADRequest = functools.partial(Request, method='HEAD') HEADRequest = functools.partial(Request, method='HEAD')
PATCHRequest = functools.partial(Request, method='PATCH')
PUTRequest = functools.partial(Request, method='PUT') PUTRequest = functools.partial(Request, method='PUT')

View File

@ -2044,7 +2044,7 @@ def url_or_none(url):
if not url or not isinstance(url, str): if not url or not isinstance(url, str):
return None return None
url = url.strip() url = url.strip()
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?|wss?):)?//', url) else None
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):