mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-30 04:26:03 +00:00
Compare commits
19 Commits
d14c0fe223
...
72a4a46152
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72a4a46152 | ||
|
|
7b0dd8b2d1 | ||
|
|
4ee46531c9 | ||
|
|
ceab4d5ed6 | ||
|
|
ed6c6d7eef | ||
|
|
f484c51599 | ||
|
|
72ba487930 | ||
|
|
74e90dd9b8 | ||
|
|
1d45e30537 | ||
|
|
3c1c75ecb8 | ||
|
|
7faa18b83d | ||
|
|
a473e59233 | ||
|
|
45f01de00e | ||
|
|
db6d1f145a | ||
|
|
a3f2b54c25 | ||
|
|
91832111a1 | ||
|
|
425017531f | ||
|
|
58d0c83457 | ||
|
|
4ebf41309d |
@ -1770,7 +1770,7 @@ The following extractors use this feature:
|
|||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||||
|
|||||||
@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if op == 'startswith':
|
if op == 'startswith':
|
||||||
if not val.startswith(got):
|
if not got.startswith(val):
|
||||||
yield field, f'should start with {val!r}, got {got!r}'
|
yield field, f'should start with {val!r}, got {got!r}'
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,7 @@ from yt_dlp.cookies import YoutubeDLCookieJar
|
|||||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||||
from yt_dlp.networking import (
|
from yt_dlp.networking import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
PATCHRequest,
|
||||||
PUTRequest,
|
PUTRequest,
|
||||||
Request,
|
Request,
|
||||||
RequestDirector,
|
RequestDirector,
|
||||||
@ -1856,6 +1857,7 @@ class TestRequest:
|
|||||||
|
|
||||||
def test_request_helpers(self):
|
def test_request_helpers(self):
|
||||||
assert HEADRequest('http://example.com').method == 'HEAD'
|
assert HEADRequest('http://example.com').method == 'HEAD'
|
||||||
|
assert PATCHRequest('http://example.com').method == 'PATCH'
|
||||||
assert PUTRequest('http://example.com').method == 'PUT'
|
assert PUTRequest('http://example.com').method == 'PUT'
|
||||||
|
|
||||||
def test_headers(self):
|
def test_headers(self):
|
||||||
|
|||||||
@ -29,8 +29,8 @@ class TestGetWebPoContentBinding:
|
|||||||
assert get_webpo_content_binding(pot_request) == expected
|
assert get_webpo_content_binding(pot_request) == expected
|
||||||
|
|
||||||
def test_extract_visitor_id(self, pot_request):
|
def test_extract_visitor_id(self, pot_request):
|
||||||
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D'
|
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D'
|
||||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('12345678901', ContentBindingType.VISITOR_ID)
|
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID)
|
||||||
|
|
||||||
def test_invalid_visitor_id(self, pot_request):
|
def test_invalid_visitor_id(self, pot_request):
|
||||||
# visitor id not alphanumeric (i.e. protobuf extraction failed)
|
# visitor id not alphanumeric (i.e. protobuf extraction failed)
|
||||||
|
|||||||
@ -16,7 +16,7 @@ from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers
|
|||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def pot_request(pot_request) -> PoTokenRequest:
|
def pot_request(pot_request) -> PoTokenRequest:
|
||||||
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D' # visitor_id=12345678901
|
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_-
|
||||||
return pot_request
|
return pot_request
|
||||||
|
|
||||||
|
|
||||||
@ -51,13 +51,13 @@ class TestWebPoPCSP:
|
|||||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}),
|
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'video_id'}),
|
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||||
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
|
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
|
||||||
]],
|
]],
|
||||||
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}),
|
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||||
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '12345678901', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
|
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
|
||||||
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '12345678901', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
|
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
|
||||||
|
|
||||||
])
|
])
|
||||||
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
|
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
|
||||||
@ -68,7 +68,7 @@ class TestWebPoPCSP:
|
|||||||
pot_request.innertube_context['client']['remoteHost'] = remote_host
|
pot_request.innertube_context['client']['remoteHost'] = remote_host
|
||||||
pot_request.request_source_address = source_address
|
pot_request.request_source_address = source_address
|
||||||
pot_request.request_proxy = request_proxy
|
pot_request.request_proxy = request_proxy
|
||||||
pot_request.video_id = '12345678901' # same as visitor id to test type
|
pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type
|
||||||
|
|
||||||
assert pcs.generate_cache_spec(pot_request).key_bindings == expected
|
assert pcs.generate_cache_spec(pot_request).key_bindings == expected
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ class TestWebPoPCSP:
|
|||||||
pot_request.innertube_context['client']['clientName'] = 'WEB'
|
pot_request.innertube_context['client']['clientName'] = 'WEB'
|
||||||
pot_request.context = PoTokenContext.GVS
|
pot_request.context = PoTokenContext.GVS
|
||||||
pot_request.is_authenticated = False
|
pot_request.is_authenticated = False
|
||||||
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D', 'cbt': 'visitor_data'}
|
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'}
|
||||||
|
|
||||||
def test_default_ttl(self, ie, logger, pot_request):
|
def test_default_ttl(self, ie, logger, pot_request):
|
||||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||||
|
|||||||
@ -149,7 +149,7 @@ class TestPoTokenProvider:
|
|||||||
with pytest.raises(PoTokenProviderRejectedRequest):
|
with pytest.raises(PoTokenProviderRejectedRequest):
|
||||||
provider.request_pot(pot_request)
|
provider.request_pot(pot_request)
|
||||||
|
|
||||||
def test_provider_urlopen(self, ie, logger, pot_request):
|
def test_provider_request_webpage(self, ie, logger, pot_request):
|
||||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||||
|
|
||||||
cookiejar = YoutubeDLCookieJar()
|
cookiejar = YoutubeDLCookieJar()
|
||||||
@ -162,16 +162,17 @@ class TestPoTokenProvider:
|
|||||||
|
|
||||||
ie._downloader.urlopen = mock_urlopen
|
ie._downloader.urlopen = mock_urlopen
|
||||||
|
|
||||||
sent_request = provider._urlopen(pot_request, Request(
|
sent_request = provider._request_webpage(Request(
|
||||||
'https://example.com',
|
'https://example.com',
|
||||||
))
|
), pot_request=pot_request)
|
||||||
|
|
||||||
assert sent_request.url == 'https://example.com'
|
assert sent_request.url == 'https://example.com'
|
||||||
assert sent_request.headers['User-Agent'] == 'example-user-agent'
|
assert sent_request.headers['User-Agent'] == 'example-user-agent'
|
||||||
assert sent_request.proxies == {'all': 'socks5://example-proxy.com'}
|
assert sent_request.proxies == {'all': 'socks5://example-proxy.com'}
|
||||||
assert sent_request.extensions['cookiejar'] is cookiejar
|
assert sent_request.extensions['cookiejar'] is cookiejar
|
||||||
|
assert 'Requesting webpage' in logger.messages['info']
|
||||||
|
|
||||||
def test_provider_urlopen_override(self, ie, logger, pot_request):
|
def test_provider_request_webpage_override(self, ie, logger, pot_request):
|
||||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||||
|
|
||||||
cookiejar_request = YoutubeDLCookieJar()
|
cookiejar_request = YoutubeDLCookieJar()
|
||||||
@ -184,17 +185,47 @@ class TestPoTokenProvider:
|
|||||||
|
|
||||||
ie._downloader.urlopen = mock_urlopen
|
ie._downloader.urlopen = mock_urlopen
|
||||||
|
|
||||||
sent_request = provider._urlopen(pot_request, Request(
|
sent_request = provider._request_webpage(Request(
|
||||||
'https://example.com',
|
'https://example.com',
|
||||||
headers={'User-Agent': 'override-user-agent-override'},
|
headers={'User-Agent': 'override-user-agent-override'},
|
||||||
proxies={'http': 'http://example-proxy-override.com'},
|
proxies={'http': 'http://example-proxy-override.com'},
|
||||||
extensions={'cookiejar': YoutubeDLCookieJar()},
|
extensions={'cookiejar': YoutubeDLCookieJar()},
|
||||||
))
|
), pot_request=pot_request, note='Custom requesting webpage')
|
||||||
|
|
||||||
assert sent_request.url == 'https://example.com'
|
assert sent_request.url == 'https://example.com'
|
||||||
assert sent_request.headers['User-Agent'] == 'override-user-agent-override'
|
assert sent_request.headers['User-Agent'] == 'override-user-agent-override'
|
||||||
assert sent_request.proxies == {'http': 'http://example-proxy-override.com'}
|
assert sent_request.proxies == {'http': 'http://example-proxy-override.com'}
|
||||||
assert sent_request.extensions['cookiejar'] is not cookiejar_request
|
assert sent_request.extensions['cookiejar'] is not cookiejar_request
|
||||||
|
assert 'Custom requesting webpage' in logger.messages['info']
|
||||||
|
|
||||||
|
def test_provider_request_webpage_no_log(self, ie, logger, pot_request):
|
||||||
|
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||||
|
|
||||||
|
def mock_urlopen(request):
|
||||||
|
return request
|
||||||
|
|
||||||
|
ie._downloader.urlopen = mock_urlopen
|
||||||
|
|
||||||
|
sent_request = provider._request_webpage(Request(
|
||||||
|
'https://example.com',
|
||||||
|
), note=False)
|
||||||
|
|
||||||
|
assert sent_request.url == 'https://example.com'
|
||||||
|
assert 'info' not in logger.messages
|
||||||
|
|
||||||
|
def test_provider_request_webpage_no_pot_request(self, ie, logger):
|
||||||
|
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||||
|
|
||||||
|
def mock_urlopen(request):
|
||||||
|
return request
|
||||||
|
|
||||||
|
ie._downloader.urlopen = mock_urlopen
|
||||||
|
|
||||||
|
sent_request = provider._request_webpage(Request(
|
||||||
|
'https://example.com',
|
||||||
|
), pot_request=None)
|
||||||
|
|
||||||
|
assert sent_request.url == 'https://example.com'
|
||||||
|
|
||||||
def test_get_config_arg(self, ie, logger):
|
def test_get_config_arg(self, ie, logger):
|
||||||
provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
|
provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
|
||||||
|
|||||||
@ -659,6 +659,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||||
|
self.assertEqual(url_or_none('ws://foo.de'), 'ws://foo.de')
|
||||||
|
self.assertEqual(url_or_none('wss://foo.de'), 'wss://foo.de')
|
||||||
|
|
||||||
def test_parse_age_limit(self):
|
def test_parse_age_limit(self):
|
||||||
self.assertEqual(parse_age_limit(None), None)
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
|
|||||||
@ -85,6 +85,7 @@ class NiconicoLiveFD(FileDownloader):
|
|||||||
'quality': live_quality,
|
'quality': live_quality,
|
||||||
'protocol': 'hls+fmp4',
|
'protocol': 'hls+fmp4',
|
||||||
'latency': live_latency,
|
'latency': live_latency,
|
||||||
|
'accessRightMethod': 'single_cookie',
|
||||||
'chasePlay': False,
|
'chasePlay': False,
|
||||||
},
|
},
|
||||||
'room': {
|
'room': {
|
||||||
|
|||||||
@ -903,6 +903,7 @@ from .ivi import (
|
|||||||
IviIE,
|
IviIE,
|
||||||
)
|
)
|
||||||
from .ivideon import IvideonIE
|
from .ivideon import IvideonIE
|
||||||
|
from .ivoox import IvooxIE
|
||||||
from .iwara import (
|
from .iwara import (
|
||||||
IwaraIE,
|
IwaraIE,
|
||||||
IwaraPlaylistIE,
|
IwaraPlaylistIE,
|
||||||
@ -960,7 +961,10 @@ from .kick import (
|
|||||||
)
|
)
|
||||||
from .kicker import KickerIE
|
from .kicker import KickerIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kika import KikaIE
|
from .kika import (
|
||||||
|
KikaIE,
|
||||||
|
KikaPlaylistIE,
|
||||||
|
)
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .kommunetv import KommunetvIE
|
from .kommunetv import KommunetvIE
|
||||||
@ -1061,6 +1065,7 @@ from .loom import (
|
|||||||
from .lovehomeporn import LoveHomePornIE
|
from .lovehomeporn import LoveHomePornIE
|
||||||
from .lrt import (
|
from .lrt import (
|
||||||
LRTVODIE,
|
LRTVODIE,
|
||||||
|
LRTRadioIE,
|
||||||
LRTStreamIE,
|
LRTStreamIE,
|
||||||
)
|
)
|
||||||
from .lsm import (
|
from .lsm import (
|
||||||
@ -1493,6 +1498,10 @@ from .paramountplus import (
|
|||||||
)
|
)
|
||||||
from .parler import ParlerIE
|
from .parler import ParlerIE
|
||||||
from .parlview import ParlviewIE
|
from .parlview import ParlviewIE
|
||||||
|
from .parti import (
|
||||||
|
PartiLivestreamIE,
|
||||||
|
PartiVideoIE,
|
||||||
|
)
|
||||||
from .patreon import (
|
from .patreon import (
|
||||||
PatreonCampaignIE,
|
PatreonCampaignIE,
|
||||||
PatreonIE,
|
PatreonIE,
|
||||||
|
|||||||
@ -146,7 +146,7 @@ class TokFMPodcastIE(InfoExtractor):
|
|||||||
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '91275',
|
'id': '91275',
|
||||||
'ext': 'aac',
|
'ext': 'mp3',
|
||||||
'title': 'md5:a9b15488009065556900169fb8061cce',
|
'title': 'md5:a9b15488009065556900169fb8061cce',
|
||||||
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
||||||
'series': 'Analizy',
|
'series': 'Analizy',
|
||||||
@ -164,23 +164,20 @@ class TokFMPodcastIE(InfoExtractor):
|
|||||||
raise ExtractorError('No such podcast', expected=True)
|
raise ExtractorError('No such podcast', expected=True)
|
||||||
metadata = metadata[0]
|
metadata = metadata[0]
|
||||||
|
|
||||||
formats = []
|
mp3_url = self._download_json(
|
||||||
for ext in ('aac', 'mp3'):
|
'https://api.podcast.radioagora.pl/api4/getSongUrl',
|
||||||
url_data = self._download_json(
|
media_id, 'Downloading podcast mp3 URL', query={
|
||||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
'podcast_id': media_id,
|
||||||
media_id, f'Downloading podcast {ext} URL')
|
'device_id': str(uuid.uuid4()),
|
||||||
# prevents inserting the mp3 (default) multiple times
|
'ppre': 'false',
|
||||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
'audio': 'mp3',
|
||||||
formats.append({
|
})['link_ssl']
|
||||||
'url': url_data['link_ssl'],
|
|
||||||
'ext': ext,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': ext,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'formats': formats,
|
'url': mp3_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': metadata.get('podcast_name'),
|
'title': metadata.get('podcast_name'),
|
||||||
'series': metadata.get('series_name'),
|
'series': metadata.get('series_name'),
|
||||||
'episode': metadata.get('podcast_name'),
|
'episode': metadata.get('podcast_name'),
|
||||||
|
|||||||
@ -1570,6 +1570,8 @@ class InfoExtractor:
|
|||||||
"""Yield all json ld objects in the html"""
|
"""Yield all json ld objects in the html"""
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
fatal = False
|
fatal = False
|
||||||
|
if not fatal and not isinstance(html, str):
|
||||||
|
return
|
||||||
for mobj in re.finditer(JSON_LD_RE, html):
|
for mobj in re.finditer(JSON_LD_RE, html):
|
||||||
json_ld_item = self._parse_json(
|
json_ld_item = self._parse_json(
|
||||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||||
|
|||||||
@ -5,7 +5,9 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class CrowdBunkerIE(InfoExtractor):
|
class CrowdBunkerIE(InfoExtractor):
|
||||||
@ -44,16 +46,15 @@ class CrowdBunkerIE(InfoExtractor):
|
|||||||
'url': sub_url,
|
'url': sub_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
|
if mpd_url := traverse_obj(video_json, ('dashManifest', 'url', {url_or_none})):
|
||||||
if mpd_url:
|
fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash', fatal=False)
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
|
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
|
|
||||||
if m3u8_url:
|
if m3u8_url := traverse_obj(video_json, ('hlsManifest', 'url', {url_or_none})):
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls', fatal=False)
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': image['url'],
|
'url': image['url'],
|
||||||
|
|||||||
78
yt_dlp/extractor/ivoox.py
Normal file
78
yt_dlp/extractor/ivoox.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none, parse_iso8601, url_or_none, urljoin
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class IvooxIE(InfoExtractor):
|
||||||
|
_VALID_URL = (
|
||||||
|
r'https?://(?:www\.)?ivoox\.com/(?:\w{2}/)?[^/?#]+_rf_(?P<id>[0-9]+)_1\.html',
|
||||||
|
r'https?://go\.ivoox\.com/rf/(?P<id>[0-9]+)',
|
||||||
|
)
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ivoox.com/dex-08x30-rostros-del-mal-los-asesinos-en-audios-mp3_rf_143594959_1.html',
|
||||||
|
'md5': '993f712de5b7d552459fc66aa3726885',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '143594959',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'timestamp': 1742731200,
|
||||||
|
'channel': 'DIAS EXTRAÑOS con Santiago Camacho',
|
||||||
|
'title': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
|
||||||
|
'description': 'md5:eae8b4b9740d0216d3871390b056bb08',
|
||||||
|
'uploader': 'Santiago Camacho',
|
||||||
|
'thumbnail': 'https://static-1.ivoox.com/audios/c/d/5/2/cd52f46783fe735000c33a803dce2554_XXL.jpg',
|
||||||
|
'upload_date': '20250323',
|
||||||
|
'episode': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
|
||||||
|
'duration': 11837,
|
||||||
|
'tags': ['españa', 'asesinos en serie', 'arropiero', 'historia criminal', 'mataviejas'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://go.ivoox.com/rf/143594959',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ivoox.com/en/campodelgas-28-03-2025-audios-mp3_rf_144036942_1.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
media_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, media_id, fatal=False)
|
||||||
|
|
||||||
|
data = self._search_nuxt_data(
|
||||||
|
webpage, media_id, fatal=False, traverse=('data', 0, 'data', 'audio'))
|
||||||
|
|
||||||
|
direct_download = self._download_json(
|
||||||
|
f'https://vcore-web.ivoox.com/v1/public/audios/{media_id}/download-url', media_id, fatal=False,
|
||||||
|
note='Fetching direct download link', headers={'Referer': url})
|
||||||
|
|
||||||
|
download_paths = {
|
||||||
|
*traverse_obj(direct_download, ('data', 'downloadUrl', {str}, filter, all)),
|
||||||
|
*traverse_obj(data, (('downloadUrl', 'mediaUrl'), {str}, filter)),
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for path in download_paths:
|
||||||
|
formats.append({
|
||||||
|
'url': urljoin('https://ivoox.com', path),
|
||||||
|
'http_headers': {'Referer': url},
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': self._html_search_regex(r'data-prm-author="([^"]+)"', webpage, 'author', default=None),
|
||||||
|
'timestamp': parse_iso8601(
|
||||||
|
self._html_search_regex(r'data-prm-pubdate="([^"]+)"', webpage, 'timestamp', default=None)),
|
||||||
|
'channel': self._html_search_regex(r'data-prm-podname="([^"]+)"', webpage, 'channel', default=None),
|
||||||
|
'title': self._html_search_regex(r'data-prm-title="([^"]+)"', webpage, 'title', default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
**self._search_json_ld(webpage, media_id, default={}),
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('image', {url_or_none}),
|
||||||
|
'timestamp': ('uploadDate', {parse_iso8601(delimiter=' ')}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., 'name', {str}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
@ -1,3 +1,5 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -124,3 +126,43 @@ class KikaIE(InfoExtractor):
|
|||||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KikaPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w-]+/(?P<id>[a-z-]+\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.kika.de/logo/logo-die-welt-und-ich-562',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'logo-die-welt-und-ich-562',
|
||||||
|
'title': 'logo!',
|
||||||
|
'description': 'md5:7b9d7f65561b82fa512f2cfb553c397d',
|
||||||
|
},
|
||||||
|
'playlist_count': 100,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, playlist_url, playlist_id):
|
||||||
|
for page in itertools.count(1):
|
||||||
|
data = self._download_json(playlist_url, playlist_id, note=f'Downloading page {page}')
|
||||||
|
for item in traverse_obj(data, ('content', lambda _, v: url_or_none(v['api']['url']))):
|
||||||
|
yield self.url_result(
|
||||||
|
item['api']['url'], ie=KikaIE,
|
||||||
|
**traverse_obj(item, {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'timestamp': ('date', {parse_iso8601}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
playlist_url = traverse_obj(data, ('links', 'next', {url_or_none}))
|
||||||
|
if not playlist_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
brand_data = self._download_json(
|
||||||
|
f'https://www.kika.de/_next-api/proxy/v1/brands/{playlist_id}', playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(brand_data['videoSubchannel']['videosPageUrl'], playlist_id),
|
||||||
|
playlist_id, title=brand_data.get('title'), description=brand_data.get('description'))
|
||||||
|
|||||||
@ -2,8 +2,11 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -80,7 +83,7 @@ class LRTVODIE(LRTBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, video_id = self._match_valid_url(url).groups()
|
path, video_id = self._match_valid_url(url).group('path', 'id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
media_url = self._extract_js_var(webpage, 'main_url', path)
|
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||||
@ -106,3 +109,42 @@ class LRTVODIE(LRTBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
return merge_dicts(clean_info, jw_data, json_ld_data)
|
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||||
|
|
||||||
|
|
||||||
|
class LRTRadioIE(LRTBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/radioteka/irasas/(?P<id>\d+)/(?P<path>[^?#/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# m3u8 download
|
||||||
|
'url': 'https://www.lrt.lt/radioteka/irasas/2000359728/nemarios-eiles-apie-pragarus-ir-skaistyklas-su-aiste-kiltinaviciute',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2000359728',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Nemarios eilės: apie pragarus ir skaistyklas su Aiste Kiltinavičiūte',
|
||||||
|
'description': 'md5:5eee9a0e86a55bf547bd67596204625d',
|
||||||
|
'timestamp': 1726143120,
|
||||||
|
'upload_date': '20240912',
|
||||||
|
'tags': 'count:5',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpe?g',
|
||||||
|
'categories': ['Daiktiniai įrodymai'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lrt.lt/radioteka/irasas/2000304654/vakaras-su-knyga-svetlana-aleksijevic-cernobylio-malda-v-dalis?season=%2Fmediateka%2Faudio%2Fvakaras-su-knyga%2F2023',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, path = self._match_valid_url(url).group('id', 'path')
|
||||||
|
media = self._download_json(
|
||||||
|
'https://www.lrt.lt/radioteka/api/media', video_id,
|
||||||
|
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
|
||||||
|
|
||||||
|
return traverse_obj(media, {
|
||||||
|
'id': ('id', {int}, {str_or_none}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'tags': ('tags', ..., 'name', {str}),
|
||||||
|
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||||
|
'description': ('content', {clean_html}, {str}),
|
||||||
|
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||||
|
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||||
|
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
|
||||||
|
})
|
||||||
|
|||||||
@ -10,7 +10,9 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class MixcloudBaseIE(InfoExtractor):
|
class MixcloudBaseIE(InfoExtractor):
|
||||||
@ -37,7 +39,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'dholbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -46,10 +48,11 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'uploader_url': 'https://www.mixcloud.com/dholbach/',
|
'uploader_url': 'https://www.mixcloud.com/dholbach/',
|
||||||
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
|
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
|
||||||
'duration': 3723,
|
'duration': 3723,
|
||||||
'tags': [],
|
'tags': ['liquid drum and bass', 'drum and bass'],
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'artists': list,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
@ -67,7 +70,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'upload_date': '20150203',
|
'upload_date': '20150203',
|
||||||
'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
|
'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
|
||||||
'duration': 2992,
|
'duration': 2992,
|
||||||
'tags': [],
|
'tags': ['jazz', 'soul', 'world music', 'funk'],
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@ -149,8 +152,6 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
elif reason:
|
elif reason:
|
||||||
raise ExtractorError('Track is restricted', expected=True)
|
raise ExtractorError('Track is restricted', expected=True)
|
||||||
|
|
||||||
title = cloudcast['name']
|
|
||||||
|
|
||||||
stream_info = cloudcast['streamInfo']
|
stream_info = cloudcast['streamInfo']
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
@ -182,47 +183,39 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
self.raise_login_required(metadata_available=True)
|
self.raise_login_required(metadata_available=True)
|
||||||
|
|
||||||
comments = []
|
comments = []
|
||||||
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
|
for node in traverse_obj(cloudcast, ('comments', 'edges', ..., 'node', {dict})):
|
||||||
node = edge.get('node') or {}
|
|
||||||
text = strip_or_none(node.get('comment'))
|
text = strip_or_none(node.get('comment'))
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
user = node.get('user') or {}
|
|
||||||
comments.append({
|
comments.append({
|
||||||
'author': user.get('displayName'),
|
|
||||||
'author_id': user.get('username'),
|
|
||||||
'text': text,
|
'text': text,
|
||||||
'timestamp': parse_iso8601(node.get('created')),
|
**traverse_obj(node, {
|
||||||
|
'author': ('user', 'displayName', {str}),
|
||||||
|
'author_id': ('user', 'username', {str}),
|
||||||
|
'timestamp': ('created', {parse_iso8601}),
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
tags = []
|
|
||||||
for t in cloudcast.get('tags'):
|
|
||||||
tag = try_get(t, lambda x: x['tag']['name'], str)
|
|
||||||
if not tag:
|
|
||||||
tags.append(tag)
|
|
||||||
|
|
||||||
get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
|
|
||||||
|
|
||||||
owner = cloudcast.get('owner') or {}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': cloudcast.get('description'),
|
|
||||||
'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], str),
|
|
||||||
'uploader': owner.get('displayName'),
|
|
||||||
'timestamp': parse_iso8601(cloudcast.get('publishDate')),
|
|
||||||
'uploader_id': owner.get('username'),
|
|
||||||
'uploader_url': owner.get('url'),
|
|
||||||
'duration': int_or_none(cloudcast.get('audioLength')),
|
|
||||||
'view_count': int_or_none(cloudcast.get('plays')),
|
|
||||||
'like_count': get_count('favorites'),
|
|
||||||
'repost_count': get_count('reposts'),
|
|
||||||
'comment_count': get_count('comments'),
|
|
||||||
'comments': comments,
|
'comments': comments,
|
||||||
'tags': tags,
|
**traverse_obj(cloudcast, {
|
||||||
'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('picture', 'url', {url_or_none}),
|
||||||
|
'timestamp': ('publishDate', {parse_iso8601}),
|
||||||
|
'duration': ('audioLength', {int_or_none}),
|
||||||
|
'uploader': ('owner', 'displayName', {str}),
|
||||||
|
'uploader_id': ('owner', 'username', {str}),
|
||||||
|
'uploader_url': ('owner', 'url', {url_or_none}),
|
||||||
|
'view_count': ('plays', {int_or_none}),
|
||||||
|
'like_count': ('favorites', 'totalCount', {int_or_none}),
|
||||||
|
'repost_count': ('reposts', 'totalCount', {int_or_none}),
|
||||||
|
'comment_count': ('comments', 'totalCount', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., 'tag', 'name', {str}, filter, all, filter),
|
||||||
|
'artists': ('featuringArtistList', ..., {str}, filter, all, filter),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -295,7 +288,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
'url': 'http://www.mixcloud.com/dholbach/',
|
'url': 'http://www.mixcloud.com/dholbach/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'dholbach (uploads)',
|
||||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 36,
|
'playlist_mincount': 36,
|
||||||
@ -303,7 +296,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'dholbach (uploads)',
|
||||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 36,
|
'playlist_mincount': 36,
|
||||||
@ -311,7 +304,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_favorites',
|
'id': 'dholbach_favorites',
|
||||||
'title': 'Daniel Holbach (favorites)',
|
'title': 'dholbach (favorites)',
|
||||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
# 'params': {
|
# 'params': {
|
||||||
@ -337,7 +330,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
'title': 'First Ear (stream)',
|
'title': 'First Ear (stream)',
|
||||||
'description': 'we maraud for ears',
|
'description': 'we maraud for ears',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 269,
|
'playlist_mincount': 267,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TITLE_KEY = 'displayName'
|
_TITLE_KEY = 'displayName'
|
||||||
@ -361,7 +354,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
|||||||
'id': 'maxvibes_jazzcat-on-ness-radio',
|
'id': 'maxvibes_jazzcat-on-ness-radio',
|
||||||
'title': 'Ness Radio sessions',
|
'title': 'Ness Radio sessions',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 59,
|
'playlist_mincount': 58,
|
||||||
}]
|
}]
|
||||||
_TITLE_KEY = 'name'
|
_TITLE_KEY = 'name'
|
||||||
_DESCRIPTION_KEY = 'description'
|
_DESCRIPTION_KEY = 'description'
|
||||||
|
|||||||
@ -27,6 +27,7 @@ from ..utils import (
|
|||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -985,6 +986,7 @@ class NiconicoLiveIE(InfoExtractor):
|
|||||||
'quality': 'abr',
|
'quality': 'abr',
|
||||||
'protocol': 'hls+fmp4',
|
'protocol': 'hls+fmp4',
|
||||||
'latency': latency,
|
'latency': latency,
|
||||||
|
'accessRightMethod': 'single_cookie',
|
||||||
'chasePlay': False,
|
'chasePlay': False,
|
||||||
},
|
},
|
||||||
'room': {
|
'room': {
|
||||||
@ -1005,6 +1007,7 @@ class NiconicoLiveIE(InfoExtractor):
|
|||||||
if data.get('type') == 'stream':
|
if data.get('type') == 'stream':
|
||||||
m3u8_url = data['data']['uri']
|
m3u8_url = data['data']['uri']
|
||||||
qualities = data['data']['availableQualities']
|
qualities = data['data']['availableQualities']
|
||||||
|
cookies = data['data']['cookies']
|
||||||
break
|
break
|
||||||
elif data.get('type') == 'disconnect':
|
elif data.get('type') == 'disconnect':
|
||||||
self.write_debug(recv)
|
self.write_debug(recv)
|
||||||
@ -1043,6 +1046,11 @@ class NiconicoLiveIE(InfoExtractor):
|
|||||||
**res,
|
**res,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
for cookie in cookies:
|
||||||
|
self._set_cookie(
|
||||||
|
cookie['domain'], cookie['name'], cookie['value'],
|
||||||
|
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||||
for fmt, q in zip(formats, reversed(qualities[1:])):
|
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||||
fmt.update({
|
fmt.update({
|
||||||
|
|||||||
101
yt_dlp/extractor/parti.py
Normal file
101
yt_dlp/extractor/parti.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import UserNotLive, int_or_none, parse_iso8601, url_or_none, urljoin
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class PartiBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, path, video_id, note=None):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
|
||||||
|
|
||||||
|
|
||||||
|
class PartiVideoIE(PartiBaseIE):
|
||||||
|
IE_NAME = 'parti:video'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?parti\.com/video/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://parti.com/video/66284',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '66284',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NOW LIVE ',
|
||||||
|
'upload_date': '20250327',
|
||||||
|
'categories': ['Gaming'],
|
||||||
|
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
|
||||||
|
'channel': 'ItZTMGG',
|
||||||
|
'timestamp': 1743044379,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(f'get_livestream_channel_info/recent/{video_id}', video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('event_title', {str}),
|
||||||
|
'channel': ('user_name', {str}),
|
||||||
|
'thumbnail': ('event_file', {url_or_none}),
|
||||||
|
'categories': ('category_name', {str}, filter, all),
|
||||||
|
'timestamp': ('event_start_ts', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PartiLivestreamIE(PartiBaseIE):
|
||||||
|
IE_NAME = 'parti:livestream'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Capt_Robs_Adventures',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
|
||||||
|
'timestamp': 1743879776,
|
||||||
|
'upload_date': '20250405',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
service, creator_slug = self._match_valid_url(url).group('service', 'id')
|
||||||
|
|
||||||
|
encoded_creator_slug = creator_slug.replace('/', '%23')
|
||||||
|
creator_id = self._call_api(
|
||||||
|
f'get_user_by_social_media/{service}/{encoded_creator_slug}',
|
||||||
|
creator_slug, note='Fetching user ID')
|
||||||
|
|
||||||
|
data = self._call_api(
|
||||||
|
f'get_livestream_channel_info/{creator_id}', creator_id,
|
||||||
|
note='Fetching user profile feed')['channel_info']
|
||||||
|
|
||||||
|
if not traverse_obj(data, ('channel', 'is_live', {bool})):
|
||||||
|
raise UserNotLive(video_id=creator_id)
|
||||||
|
|
||||||
|
channel_info = data['channel']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': creator_slug,
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
channel_info['playback_url'], creator_slug, live=True, query={
|
||||||
|
'token': channel_info['playback_auth_token'],
|
||||||
|
'player_version': '1.17.0',
|
||||||
|
}),
|
||||||
|
'is_live': True,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('livestream_event_info', 'event_name', {str}),
|
||||||
|
'description': ('livestream_event_info', 'event_description', {str}),
|
||||||
|
'thumbnail': ('livestream_event_info', 'livestream_preview_file', {url_or_none}),
|
||||||
|
'timestamp': ('stream', 'start_time', {parse_iso8601}),
|
||||||
|
'view_count': ('stream', 'viewer_count', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
@ -7,7 +7,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
format_field,
|
format_field,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
@ -36,7 +35,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||||||
'upload_date': '20191020',
|
'upload_date': '20191020',
|
||||||
'channel_url': 'https://rumble.com/c/WMAR',
|
'channel_url': 'https://rumble.com/c/WMAR',
|
||||||
'channel': 'WMAR',
|
'channel': 'WMAR',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
'duration': 234,
|
'duration': 234,
|
||||||
'uploader': 'WMAR',
|
'uploader': 'WMAR',
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
@ -52,7 +51,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||||||
'upload_date': '20220217',
|
'upload_date': '20220217',
|
||||||
'channel_url': 'https://rumble.com/c/CyberTechNews',
|
'channel_url': 'https://rumble.com/c/CyberTechNews',
|
||||||
'channel': 'CTNews',
|
'channel': 'CTNews',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
'duration': 901,
|
'duration': 901,
|
||||||
'uploader': 'CTNews',
|
'uploader': 'CTNews',
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
@ -114,6 +113,22 @@ class RumbleEmbedIE(InfoExtractor):
|
|||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rumble.com/embed/v6pezdb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v6pezdb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Es war einmal ein Mädchen" – Ein filmisches Zeitzeugnis aus Leningrad 1944',
|
||||||
|
'uploader': 'RT DE',
|
||||||
|
'channel': 'RT DE',
|
||||||
|
'channel_url': 'https://rumble.com/c/RTDE',
|
||||||
|
'duration': 309,
|
||||||
|
'thumbnail': 'https://1a-1791.com/video/fww1/dc/s8/1/n/z/2/y/nz2yy.qR4e-small-Es-war-einmal-ein-Mdchen-Ei.jpg',
|
||||||
|
'timestamp': 1743703500,
|
||||||
|
'upload_date': '20250403',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -168,40 +183,42 @@ class RumbleEmbedIE(InfoExtractor):
|
|||||||
live_status = None
|
live_status = None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for ext, ext_info in (video.get('ua') or {}).items():
|
for format_type, format_info in (video.get('ua') or {}).items():
|
||||||
if isinstance(ext_info, dict):
|
if isinstance(format_info, dict):
|
||||||
for height, video_info in ext_info.items():
|
for height, video_info in format_info.items():
|
||||||
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
|
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
|
||||||
video_info.setdefault('meta', {})['h'] = height
|
video_info.setdefault('meta', {})['h'] = height
|
||||||
ext_info = ext_info.values()
|
format_info = format_info.values()
|
||||||
|
|
||||||
for video_info in ext_info:
|
for video_info in format_info:
|
||||||
meta = video_info.get('meta') or {}
|
meta = video_info.get('meta') or {}
|
||||||
if not video_info.get('url'):
|
if not video_info.get('url'):
|
||||||
continue
|
continue
|
||||||
if ext == 'hls':
|
# With default query params returns m3u8 variants which are duplicates, without returns tar files
|
||||||
|
if format_type == 'tar':
|
||||||
|
continue
|
||||||
|
if format_type == 'hls':
|
||||||
if meta.get('live') is True and video.get('live') == 1:
|
if meta.get('live') is True and video.get('live') == 1:
|
||||||
live_status = 'post_live'
|
live_status = 'post_live'
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_info['url'], video_id,
|
video_info['url'], video_id,
|
||||||
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
|
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
|
||||||
continue
|
continue
|
||||||
timeline = ext == 'timeline'
|
is_timeline = format_type == 'timeline'
|
||||||
if timeline:
|
is_audio = format_type == 'audio'
|
||||||
ext = determine_ext(video_info['url'])
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'ext': ext,
|
'acodec': 'none' if is_timeline else None,
|
||||||
'acodec': 'none' if timeline else None,
|
'vcodec': 'none' if is_audio else None,
|
||||||
'url': video_info['url'],
|
'url': video_info['url'],
|
||||||
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
|
'format_id': join_nonempty(format_type, format_field(meta, 'h', '%sp')),
|
||||||
'format_note': 'Timeline' if timeline else None,
|
'format_note': 'Timeline' if is_timeline else None,
|
||||||
'fps': None if timeline else video.get('fps'),
|
'fps': None if is_timeline or is_audio else video.get('fps'),
|
||||||
**traverse_obj(meta, {
|
**traverse_obj(meta, {
|
||||||
'tbr': 'bitrate',
|
'tbr': ('bitrate', {int_or_none}),
|
||||||
'filesize': 'size',
|
'filesize': ('size', {int_or_none}),
|
||||||
'width': 'w',
|
'width': ('w', {int_or_none}),
|
||||||
'height': 'h',
|
'height': ('h', {int_or_none}),
|
||||||
}, expected_type=lambda x: int(x) or None),
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {
|
subtitles = {
|
||||||
|
|||||||
@ -2,15 +2,17 @@ import itertools
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
bug_reports_message,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
traverse_obj,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class YandexVideoIE(InfoExtractor):
|
class YandexVideoIE(InfoExtractor):
|
||||||
@ -186,7 +188,22 @@ class YandexVideoPreviewIE(InfoExtractor):
|
|||||||
return self.url_result(data_json['video']['url'])
|
return self.url_result(data_json['video']['url'])
|
||||||
|
|
||||||
|
|
||||||
class ZenYandexIE(InfoExtractor):
|
class ZenYandexBaseIE(InfoExtractor):
|
||||||
|
def _fetch_ssr_data(self, url, video_id):
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
redirect = self._search_json(
|
||||||
|
r'(?:var|let|const)\s+it\s*=', webpage, 'redirect', video_id, default={}).get('retpath')
|
||||||
|
if redirect:
|
||||||
|
video_id = self._match_id(redirect)
|
||||||
|
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
|
||||||
|
return video_id, self._search_json(
|
||||||
|
r'(?:var|let|const)\s+_params\s*=\s*\(', webpage, 'metadata', video_id,
|
||||||
|
contains_pattern=r'{["\']ssrData.+}')['ssrData']
|
||||||
|
|
||||||
|
|
||||||
|
class ZenYandexIE(ZenYandexBaseIE):
|
||||||
|
IE_NAME = 'dzen.ru'
|
||||||
|
IE_DESC = 'Дзен (dzen) formerly Яндекс.Дзен (Yandex Zen)'
|
||||||
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
|
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
|
'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
|
||||||
@ -216,6 +233,7 @@ class ZenYandexIE(InfoExtractor):
|
|||||||
'timestamp': 1573465585,
|
'timestamp': 1573465585,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'The page does not exist',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
|
'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -227,6 +245,9 @@ class ZenYandexIE(InfoExtractor):
|
|||||||
'uploader': 'TechInsider',
|
'uploader': 'TechInsider',
|
||||||
'timestamp': 1611378221,
|
'timestamp': 1611378221,
|
||||||
'upload_date': '20210123',
|
'upload_date': '20210123',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 243,
|
||||||
|
'tags': ['опыт', 'эксперимент', 'огонь'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
@ -240,6 +261,9 @@ class ZenYandexIE(InfoExtractor):
|
|||||||
'uploader': 'TechInsider',
|
'uploader': 'TechInsider',
|
||||||
'upload_date': '20210123',
|
'upload_date': '20210123',
|
||||||
'timestamp': 1611378221,
|
'timestamp': 1611378221,
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 243,
|
||||||
|
'tags': ['опыт', 'эксперимент', 'огонь'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
@ -252,44 +276,56 @@ class ZenYandexIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_id, ssr_data = self._fetch_ssr_data(url, video_id)
|
||||||
redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath')
|
video_data = ssr_data['videoMetaResponse']
|
||||||
if redirect:
|
|
||||||
video_id = self._match_id(redirect)
|
|
||||||
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
|
|
||||||
data_json = self._search_json(
|
|
||||||
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
|
|
||||||
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
|
|
||||||
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
|
|
||||||
webpage, 'uploader', default='<a>')
|
|
||||||
uploader_name = extract_attributes(uploader).get('aria-label')
|
|
||||||
item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
|
|
||||||
video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
|
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
|
quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
|
||||||
|
# Deduplicate stream URLs. The "dzen_dash" query parameter is present in some URLs but can be omitted
|
||||||
|
stream_urls = set(traverse_obj(video_data, (
|
||||||
|
'video', ('id', ('streams', ...), ('mp4Streams', ..., 'url'), ('oneVideoStreams', ..., 'url')),
|
||||||
|
{url_or_none}, {update_url_query(query={'dzen_dash': []})})))
|
||||||
|
for s_url in stream_urls:
|
||||||
ext = determine_ext(s_url)
|
ext = determine_ext(s_url)
|
||||||
if ext == 'mpd':
|
content_type = traverse_obj(parse_qs(s_url), ('ct', 0))
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
|
if ext == 'mpd' or content_type == '6':
|
||||||
elif ext == 'm3u8':
|
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash', fatal=False)
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4')
|
elif ext == 'm3u8' or content_type == '8':
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
elif content_type == '0':
|
||||||
|
format_type = traverse_obj(parse_qs(s_url), ('type', 0))
|
||||||
|
formats.append({
|
||||||
|
'url': s_url,
|
||||||
|
'format_id': format_type,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'quality': quality(format_type),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.report_warning(f'Unsupported stream URL: {s_url}{bug_reports_message()}')
|
||||||
|
continue
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_json.get('title') or self._og_search_title(webpage),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': int_or_none(video_json.get('duration')),
|
**traverse_obj(video_data, {
|
||||||
'view_count': int_or_none(video_json.get('views')),
|
'title': ('title', {str}),
|
||||||
'timestamp': int_or_none(video_json.get('publicationDate')),
|
'description': ('description', {str}),
|
||||||
'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']),
|
'thumbnail': ('image', {url_or_none}),
|
||||||
'description': video_json.get('description') or self._og_search_description(webpage),
|
'duration': ('video', 'duration', {int_or_none}),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']),
|
'view_count': ('video', 'views', {int_or_none}),
|
||||||
|
'timestamp': ('publicationDate', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., {str}),
|
||||||
|
'uploader': ('source', 'title', {str}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ZenYandexChannelIE(InfoExtractor):
|
class ZenYandexChannelIE(ZenYandexBaseIE):
|
||||||
|
IE_NAME = 'dzen.ru:channel'
|
||||||
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
|
_VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://zen.yandex.ru/tok_media',
|
'url': 'https://zen.yandex.ru/tok_media',
|
||||||
@ -323,8 +359,8 @@ class ZenYandexChannelIE(InfoExtractor):
|
|||||||
'url': 'https://zen.yandex.ru/jony_me',
|
'url': 'https://zen.yandex.ru/jony_me',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'jony_me',
|
'id': 'jony_me',
|
||||||
'description': 'md5:ce0a5cad2752ab58701b5497835b2cc5',
|
'description': 'md5:7c30d11dc005faba8826feae99da3113',
|
||||||
'title': 'JONY ',
|
'title': 'JONY',
|
||||||
},
|
},
|
||||||
'playlist_count': 18,
|
'playlist_count': 18,
|
||||||
}, {
|
}, {
|
||||||
@ -333,9 +369,8 @@ class ZenYandexChannelIE(InfoExtractor):
|
|||||||
'url': 'https://zen.yandex.ru/tatyanareva',
|
'url': 'https://zen.yandex.ru/tatyanareva',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tatyanareva',
|
'id': 'tatyanareva',
|
||||||
'description': 'md5:40a1e51f174369ec3ba9d657734ac31f',
|
'description': 'md5:92e56fa730a932ca2483ba5c2186ad96',
|
||||||
'title': 'Татьяна Рева',
|
'title': 'Татьяна Рева',
|
||||||
'entries': 'maxcount:200',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 46,
|
'playlist_mincount': 46,
|
||||||
}, {
|
}, {
|
||||||
@ -348,43 +383,31 @@ class ZenYandexChannelIE(InfoExtractor):
|
|||||||
'playlist_mincount': 657,
|
'playlist_mincount': 657,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, item_id, server_state_json, server_settings_json):
|
def _entries(self, feed_data, channel_id):
|
||||||
items = (traverse_obj(server_state_json, ('feed', 'items', ...))
|
|
||||||
or traverse_obj(server_settings_json, ('exportData', 'items', ...)))
|
|
||||||
|
|
||||||
more = (traverse_obj(server_state_json, ('links', 'more'))
|
|
||||||
or traverse_obj(server_settings_json, ('exportData', 'more', 'link')))
|
|
||||||
|
|
||||||
next_page_id = None
|
next_page_id = None
|
||||||
for page in itertools.count(1):
|
for page in itertools.count(1):
|
||||||
for item in items or []:
|
for item in traverse_obj(feed_data, (
|
||||||
if item.get('type') != 'gif':
|
(None, ('items', lambda _, v: v['tab'] in ('shorts', 'longs'))),
|
||||||
continue
|
'items', lambda _, v: url_or_none(v['link']),
|
||||||
video_id = traverse_obj(item, 'publication_id', 'publicationId') or ''
|
)):
|
||||||
yield self.url_result(item['link'], ZenYandexIE, video_id.split(':')[-1])
|
yield self.url_result(item['link'], ZenYandexIE, item.get('id'), title=item.get('title'))
|
||||||
|
|
||||||
|
more = traverse_obj(feed_data, ('more', 'link', {url_or_none}))
|
||||||
current_page_id = next_page_id
|
current_page_id = next_page_id
|
||||||
next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
|
next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
|
||||||
if not all((more, items, next_page_id, next_page_id != current_page_id)):
|
if not all((more, next_page_id, next_page_id != current_page_id)):
|
||||||
break
|
break
|
||||||
|
|
||||||
data = self._download_json(more, item_id, note=f'Downloading Page {page}')
|
feed_data = self._download_json(more, channel_id, note=f'Downloading Page {page}')
|
||||||
items, more = data.get('items'), traverse_obj(data, ('more', 'link'))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, item_id)
|
channel_id, ssr_data = self._fetch_ssr_data(url, channel_id)
|
||||||
redirect = self._search_json(
|
channel_data = ssr_data['exportResponse']
|
||||||
r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath')
|
|
||||||
if redirect:
|
|
||||||
item_id = self._match_id(redirect)
|
|
||||||
webpage = self._download_webpage(redirect, item_id, note='Redirecting')
|
|
||||||
data = self._search_json(
|
|
||||||
r'("data"\s*:|data\s*=)', webpage, 'channel data', item_id, contains_pattern=r'{\"__serverState__.+}')
|
|
||||||
server_state_json = traverse_obj(data, lambda k, _: k.startswith('__serverState__'), get_all=False)
|
|
||||||
server_settings_json = traverse_obj(data, lambda k, _: k.startswith('__serverSettings__'), get_all=False)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(item_id, server_state_json, server_settings_json),
|
self._entries(channel_data['feedData'], channel_id),
|
||||||
item_id, traverse_obj(server_state_json, ('channel', 'source', 'title')),
|
channel_id, **traverse_obj(channel_data, ('channel', 'source', {
|
||||||
traverse_obj(server_state_json, ('channel', 'source', 'description')))
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
})))
|
||||||
|
|||||||
@ -524,10 +524,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=f'{item_id} page {page_num}',
|
item_id=f'{item_id} page {page_num}',
|
||||||
query=continuation, headers=headers, ytcfg=ytcfg,
|
query=continuation, headers=headers, ytcfg=ytcfg,
|
||||||
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
|
check_get_keys=(
|
||||||
|
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
|
||||||
|
# Playlist recommendations may return with no data - ignore
|
||||||
|
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
|
||||||
|
))
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
continuation = None
|
||||||
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
|
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
|
||||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
|
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
|
||||||
visitor_data = self._extract_visitor_data(response) or visitor_data
|
visitor_data = self._extract_visitor_data(response) or visitor_data
|
||||||
@ -564,7 +570,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
yield from func(video_items_renderer)
|
yield from func(video_items_renderer)
|
||||||
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
|
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
|
||||||
|
|
||||||
if not video_items_renderer:
|
# In the case only a continuation is returned, try to follow it.
|
||||||
|
# We extract this after trying to extract non-continuation items as otherwise this
|
||||||
|
# may be prioritized over other continuations.
|
||||||
|
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||||
|
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
|
||||||
|
|
||||||
|
if not continuation and not video_items_renderer:
|
||||||
break
|
break
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 94,
|
'playlist_mincount': 94,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
'title': 'Igor Kleiner - Playlists',
|
||||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||||
'uploader': 'Igor Kleiner Ph.D.',
|
'uploader': 'Igor Kleiner ',
|
||||||
'uploader_id': '@IgorDataScience',
|
'uploader_id': '@IgorDataScience',
|
||||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||||
'channel': 'Igor Kleiner Ph.D.',
|
'channel': 'Igor Kleiner ',
|
||||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
'tags': 'count:23',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
},
|
},
|
||||||
@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 94,
|
'playlist_mincount': 94,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
'title': 'Igor Kleiner - Playlists',
|
||||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||||
'uploader': 'Igor Kleiner Ph.D.',
|
'uploader': 'Igor Kleiner ',
|
||||||
'uploader_id': '@IgorDataScience',
|
'uploader_id': '@IgorDataScience',
|
||||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
'tags': 'count:23',
|
||||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'channel': 'Igor Kleiner Ph.D.',
|
'channel': 'Igor Kleiner ',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix channel_is_verified extraction
|
||||||
'note': 'playlists, series',
|
'note': 'playlists, series',
|
||||||
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
|
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 5,
|
||||||
@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'basic, single video playlist',
|
'note': 'basic, single video playlist',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||||
'title': 'youtube-dl public playlist',
|
'title': 'single video playlist',
|
||||||
'description': '',
|
'description': '',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'modified_date': '20201130',
|
'modified_date': '20250417',
|
||||||
'channel': 'Sergey M.',
|
'channel': 'cole-dlp-test-acc',
|
||||||
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'uploader': 'Sergey M.',
|
'uploader': 'cole-dlp-test-acc',
|
||||||
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
|
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||||
'uploader_id': '@sergeym.6173',
|
'uploader_id': '@coletdjnz',
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
}, {
|
}, {
|
||||||
@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 17,
|
'playlist_mincount': 17,
|
||||||
}, {
|
}, {
|
||||||
'note': 'Community tab',
|
'note': 'Posts tab',
|
||||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
|
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
'title': 'lex will - Community',
|
'title': 'lex will - Posts',
|
||||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||||
'channel': 'lex will',
|
'channel': 'lex will',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 18,
|
'playlist_mincount': 18,
|
||||||
}, {
|
}, {
|
||||||
'note': 'Channels tab',
|
# TODO: fix channel_is_verified extraction
|
||||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
|
||||||
'title': 'lex will - Channels',
|
|
||||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
|
||||||
'channel': 'lex will',
|
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
|
||||||
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
|
||||||
'tags': ['bible', 'history', 'prophesy'],
|
|
||||||
'channel_follower_count': int,
|
|
||||||
'uploader_url': 'https://www.youtube.com/@lexwill718',
|
|
||||||
'uploader_id': '@lexwill718',
|
|
||||||
'uploader': 'lex will',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 12,
|
|
||||||
}, {
|
|
||||||
'note': 'Search tab',
|
'note': 'Search tab',
|
||||||
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
|
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
|
||||||
'playlist_mincount': 40,
|
'playlist_mincount': 40,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||||
'title': '3Blue1Brown - Search - linear algebra',
|
'title': '3Blue1Brown - Search - linear algebra',
|
||||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||||
'tags': ['Mathematics'],
|
'tags': ['Mathematics'],
|
||||||
'channel': '3Blue1Brown',
|
'channel': '3Blue1Brown',
|
||||||
@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 21,
|
'playlist_mincount': 21,
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'Playlist with "show unavailable videos" button',
|
'note': 'Playlist with "show unavailable videos" button',
|
||||||
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
|
'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
|
'title': 'The Memes Of 2010s.....',
|
||||||
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
|
'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'channel': 'Phim Siêu Nhân Nhật Bản',
|
'channel': "I'm Not JiNxEd",
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'description': '',
|
'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
|
'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||||
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
|
'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||||
'modified_date': r're:\d{8}',
|
'modified_date': r're:\d{8}',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
|
'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
|
||||||
'uploader_id': '@phimsieunhannhatban',
|
'uploader_id': '@imnotjinxed1998',
|
||||||
'uploader': 'Phim Siêu Nhân Nhật Bản',
|
'uploader': "I'm Not JiNxEd",
|
||||||
},
|
},
|
||||||
'playlist_mincount': 200,
|
'playlist_mincount': 150,
|
||||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||||
}, {
|
}, {
|
||||||
'note': 'Playlist with unavailable videos in page 7',
|
'note': 'Playlist with unavailable videos in page 7',
|
||||||
@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 1000,
|
'playlist_mincount': 1000,
|
||||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
|
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'hGkQjiJLjWQ', # This will keep changing
|
'id': 'YDvsBbKfLPA', # This will keep changing
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': str,
|
'title': str,
|
||||||
'upload_date': r're:\d{8}',
|
'upload_date': r're:\d{8}',
|
||||||
@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'uploader_id': '@SkyNews',
|
'uploader_id': '@SkyNews',
|
||||||
'uploader': 'Sky News',
|
'uploader': 'Sky News',
|
||||||
'channel_is_verified': True,
|
'channel_is_verified': True,
|
||||||
|
'media_type': 'livestream',
|
||||||
|
'timestamp': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
|
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'VLPL, should redirect to playlist?list=PL...',
|
'note': 'VLPL, should redirect to playlist?list=PL...',
|
||||||
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
|
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
|
||||||
# Treat as a general feed
|
# Treat as a general feed
|
||||||
|
# TODO: fix extraction
|
||||||
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
||||||
@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||||
}, {
|
}, {
|
||||||
'note': 'unlisted single video playlist',
|
'note': 'unlisted single video playlist',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||||
'title': 'yt-dlp unlisted playlist test',
|
'title': 'unlisted playlist',
|
||||||
'availability': 'unlisted',
|
'availability': 'unlisted',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'modified_date': '20220418',
|
'modified_date': '20250417',
|
||||||
'channel': 'colethedj',
|
'channel': 'cole-dlp-test-acc',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': '',
|
'description': '',
|
||||||
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
|
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'uploader_url': 'https://www.youtube.com/@colethedj1894',
|
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||||
'uploader_id': '@colethedj1894',
|
'uploader_id': '@coletdjnz',
|
||||||
'uploader': 'colethedj',
|
'uploader': 'cole-dlp-test-acc',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
'params': {'extract_flat': True},
|
'params': {'extract_flat': True},
|
||||||
}, {
|
}, {
|
||||||
|
# By default, recommended is always empty.
|
||||||
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
||||||
'url': 'https://www.youtube.com/feed/recommended',
|
'url': 'https://www.youtube.com/feed/recommended',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'title': 'recommended',
|
'title': 'recommended',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
},
|
},
|
||||||
'playlist_mincount': 50,
|
'playlist_count': 0,
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'extractor_args': {'youtubetab': {'skip': ['webpage']}},
|
'extractor_args': {'youtubetab': {'skip': ['webpage']}},
|
||||||
@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Query for sorting no longer works',
|
'skip': 'Query for sorting no longer works',
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
|
||||||
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
||||||
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
|
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix metadata extraction
|
||||||
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
|
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||||
'modified_date': '20220407',
|
'modified_date': '20250115',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
|
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'availability': 'unlisted',
|
'availability': 'unlisted',
|
||||||
@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'expected_warnings': ['Preferring "ja"'],
|
'expected_warnings': ['Preferring "ja"'],
|
||||||
}, {
|
}, {
|
||||||
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
|
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
|
||||||
|
# TODO: fix availability extraction
|
||||||
'note': 'preferred lang set with playlist with translated video titles',
|
'note': 'preferred lang set with playlist with translated video titles',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# shorts audio pivot for 2GtVksBMYFM.
|
# shorts audio pivot for 2GtVksBMYFM.
|
||||||
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
|
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
|
||||||
|
# TODO: fix extraction
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sfv_audio_pivot',
|
'id': 'sfv_audio_pivot',
|
||||||
'title': 'sfv_audio_pivot',
|
'title': 'sfv_audio_pivot',
|
||||||
@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 8,
|
'playlist_mincount': 8,
|
||||||
}, {
|
}, {
|
||||||
# Should get three playlists for videos, shorts and streams tabs
|
# Should get three playlists for videos, shorts and streams tabs
|
||||||
|
# TODO: fix channel_is_verified extraction
|
||||||
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
|
'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
|
||||||
'channel': 'Polka Ch. 尾丸ポルカ',
|
'channel': 'Polka Ch. 尾丸ポルカ',
|
||||||
'tags': 'count:35',
|
'tags': 'count:35',
|
||||||
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
|
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
|
||||||
@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
# Shorts tab with channel with handle
|
# Shorts tab with channel with handle
|
||||||
# TODO: fix channel description
|
# TODO: fix channel_is_verified extraction
|
||||||
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
|
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UC0intLFzLaudFG-xAvUEO-A',
|
'id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||||
'title': 'Not Just Bikes - Shorts',
|
'title': 'Not Just Bikes - Shorts',
|
||||||
'tags': 'count:10',
|
'tags': 'count:10',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
||||||
'description': 'md5:5e82545b3a041345927a92d0585df247',
|
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||||
'channel': 'Not Just Bikes',
|
'channel': 'Not Just Bikes',
|
||||||
@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
'channel': '中村悠一',
|
'channel': '中村悠一',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
|
'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
|
||||||
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
|
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
|
||||||
'uploader_id': '@Yuichi-Nakamura',
|
'uploader_id': '@Yuichi-Nakamura',
|
||||||
'uploader': '中村悠一',
|
'uploader': '中村悠一',
|
||||||
@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# No videos tab but has a shorts tab
|
# No videos tab but has a shorts tab
|
||||||
|
# TODO: fix metadata extraction
|
||||||
'url': 'https://www.youtube.com/c/TKFShorts',
|
'url': 'https://www.youtube.com/c/TKFShorts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# Shorts url result in shorts tab
|
# Shorts url result in shorts tab
|
||||||
# TODO: Fix channel id extraction
|
# TODO: Fix channel id extraction
|
||||||
|
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
|
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'params': {'extract_flat': True},
|
'params': {'extract_flat': True},
|
||||||
}, {
|
}, {
|
||||||
# Live video status should be extracted
|
# Live video status should be extracted
|
||||||
|
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||||
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
|
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||||
@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
}, {
|
}, {
|
||||||
# Channel renderer metadata. Contains number of videos on the channel
|
# Channel renderer metadata. Contains number of videos on the channel
|
||||||
|
# TODO: channels tab removed, change this test to use another page with channel renderer
|
||||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
|
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'params': {'extract_flat': True},
|
'params': {'extract_flat': True},
|
||||||
|
'skip': 'channels tab removed',
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix channel_is_verified extraction
|
||||||
'url': 'https://www.youtube.com/@3blue1brown/about',
|
'url': 'https://www.youtube.com/@3blue1brown/about',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '@3blue1brown',
|
'id': '@3blue1brown',
|
||||||
@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
|
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||||
'channel': '3Blue1Brown',
|
'channel': '3Blue1Brown',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||||
'uploader_id': '@3blue1brown',
|
'uploader_id': '@3blue1brown',
|
||||||
'uploader': '3Blue1Brown',
|
'uploader': '3Blue1Brown',
|
||||||
@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_count': 5,
|
'playlist_count': 5,
|
||||||
}, {
|
}, {
|
||||||
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
||||||
|
# TODO: fix channel_is_verified extraction
|
||||||
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
||||||
@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'playlist_mincount': 100,
|
'playlist_mincount': 100,
|
||||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: fix channel_is_verified extraction
|
||||||
'note': 'Tags containing spaces',
|
'note': 'Tags containing spaces',
|
||||||
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
|
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||||||
'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
|
'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
|
||||||
'mark fischbach'],
|
'mark fischbach'],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||||
|
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
|
||||||
|
'url': 'https://www.youtube.com/@sbcitygov/streams',
|
||||||
|
'playlist_mincount': 150,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||||
|
'channel': 'sbcitygov',
|
||||||
|
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||||
|
'title': 'sbcitygov - Live',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
|
||||||
|
'uploader_id': '@sbcitygov',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@sbcitygov',
|
||||||
|
'uploader': 'sbcitygov',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@ -2943,7 +2943,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
self.write_debug(f'{kwargs.get("video_id")}: No {pot_request.context.value} PO Token available for {client} client')
|
self.write_debug(f'{kwargs.get("video_id")}: No {pot_request.context.value} PO Token available for {client} client')
|
||||||
return
|
return
|
||||||
|
|
||||||
self.write_debug(f'{kwargs.get("video_id")}: Fetched a {pot_request.context.value} PO Token for {client} client')
|
self.write_debug(f'{kwargs.get("video_id")}: Retrieved a {pot_request.context.value} PO Token for {client} client')
|
||||||
return po_token
|
return po_token
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -3712,6 +3712,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if 'sign in' in reason.lower():
|
if 'sign in' in reason.lower():
|
||||||
reason = remove_end(reason, 'This helps protect our community. Learn more')
|
reason = remove_end(reason, 'This helps protect our community. Learn more')
|
||||||
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
||||||
|
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
||||||
|
reason += '. YouTube is requiring a captcha challenge before playback'
|
||||||
self.raise_no_formats(reason, expected=True)
|
self.raise_no_formats(reason, expected=True)
|
||||||
|
|
||||||
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
||||||
@ -3940,7 +3942,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not traverse_obj(initial_data, 'contents'):
|
if not traverse_obj(initial_data, 'contents'):
|
||||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||||
initial_data = None
|
initial_data = None
|
||||||
if not initial_data:
|
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
|
||||||
query = {'videoId': video_id}
|
query = {'videoId': video_id}
|
||||||
query.update(self._get_checkok_params())
|
query.update(self._get_checkok_params())
|
||||||
initial_data = self._extract_response(
|
initial_data = self._extract_response(
|
||||||
|
|||||||
@ -89,16 +89,16 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP
|
|||||||
# ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubepot-<PROVIDER_KEY>`.
|
# ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubepot-<PROVIDER_KEY>`.
|
||||||
# For this example, the extractor arg would be `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"`
|
# For this example, the extractor arg would be `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"`
|
||||||
external_provider_url = self._configuration_arg('url', default=['https://provider.example.com/get_pot'])[0]
|
external_provider_url = self._configuration_arg('url', default=['https://provider.example.com/get_pot'])[0]
|
||||||
|
|
||||||
|
# See below for logging guidelines
|
||||||
|
self.logger.trace(f'Using external provider URL: {external_provider_url}')
|
||||||
|
|
||||||
# You should use the internal HTTP client to make requests where possible,
|
# You should use the internal HTTP client to make requests where possible,
|
||||||
# as it will handle cookies and other networking settings passed to yt-dlp.
|
# as it will handle cookies and other networking settings passed to yt-dlp.
|
||||||
try:
|
try:
|
||||||
# See below for logging guidelines
|
# See docstring in _request_webpage method for request tips
|
||||||
self.logger.info(f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider')
|
response = self._request_webpage(
|
||||||
|
Request(external_provider_url, data=json.dumps({
|
||||||
# See docstring in _urlopen method for request tips
|
|
||||||
response = self._urlopen(
|
|
||||||
request, Request(external_provider_url, data=json.dumps({
|
|
||||||
'content_binding': get_webpo_content_binding(request),
|
'content_binding': get_webpo_content_binding(request),
|
||||||
'proxy': request.request_proxy,
|
'proxy': request.request_proxy,
|
||||||
'headers': request.request_headers,
|
'headers': request.request_headers,
|
||||||
@ -107,7 +107,10 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP
|
|||||||
# Important: If your provider has its own caching, please respect `bypass_cache`.
|
# Important: If your provider has its own caching, please respect `bypass_cache`.
|
||||||
# This may be used in the future to request a fresh PO Token if required.
|
# This may be used in the future to request a fresh PO Token if required.
|
||||||
'do_not_cache': request.bypass_cache,
|
'do_not_cache': request.bypass_cache,
|
||||||
}).encode(), proxies={'all': None}))
|
}).encode(), proxies={'all': None}),
|
||||||
|
pot_request=request,
|
||||||
|
note=f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider',
|
||||||
|
)
|
||||||
|
|
||||||
except RequestError as e:
|
except RequestError as e:
|
||||||
# ℹ️ If there is an error, raise PoTokenProviderError.
|
# ℹ️ If there is an error, raise PoTokenProviderError.
|
||||||
|
|||||||
@ -17,7 +17,7 @@ from yt_dlp.extractor.youtube.pot._provider import (
|
|||||||
register_provider_generic,
|
register_provider_generic,
|
||||||
)
|
)
|
||||||
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences
|
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences
|
||||||
from yt_dlp.networking import Request
|
from yt_dlp.networking import Request, Response
|
||||||
from yt_dlp.utils import traverse_obj
|
from yt_dlp.utils import traverse_obj
|
||||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
@ -135,27 +135,34 @@ class PoTokenProvider(IEContentProvider, abc.ABC, suffix='PTP'):
|
|||||||
|
|
||||||
# Helper functions
|
# Helper functions
|
||||||
|
|
||||||
def _urlopen(self, pot_request: PoTokenRequest, http_request: Request):
|
def _request_webpage(self, request: Request, pot_request: PoTokenRequest | None = None, note=None, **kwargs) -> Response:
|
||||||
"""Make a request using the request parameters from the PoTokenRequest.
|
"""Make a request using the internal HTTP Client.
|
||||||
Use this instead of calling requests, urllib3 or other HTTP client libraries directly!!
|
Use this instead of calling requests, urllib3 or other HTTP client libraries directly!
|
||||||
|
|
||||||
YouTube cookies will be automatically applied if this request is made to YouTube.
|
YouTube cookies will be automatically applied if this request is made to YouTube.
|
||||||
|
|
||||||
|
@param request: The request to make
|
||||||
|
@param pot_request: The PoTokenRequest to use. Request parameters will be merged from it.
|
||||||
|
@param note: Custom log message to display when making the request. Set to `False` to disable logging.
|
||||||
|
|
||||||
Tips:
|
Tips:
|
||||||
- Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None})
|
- Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None})
|
||||||
- Set request timeout: Request(..., extensions={'timeout': 5.0})
|
- Set request timeout: Request(..., extensions={'timeout': 5.0})
|
||||||
"""
|
"""
|
||||||
req = http_request.copy()
|
req = request.copy()
|
||||||
|
|
||||||
# Merge some ctx request settings into the request
|
# Merge some ctx request settings into the request
|
||||||
# Most of these will already be used by the configured ydl instance,
|
# Most of these will already be used by the configured ydl instance,
|
||||||
# however, the YouTube extractor may override some.
|
# however, the YouTube extractor may override some.
|
||||||
req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers)
|
if pot_request is not None:
|
||||||
req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {})
|
req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers)
|
||||||
|
req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {})
|
||||||
|
|
||||||
if pot_request.request_cookiejar is not None:
|
if pot_request.request_cookiejar is not None:
|
||||||
req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar)
|
req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar)
|
||||||
|
|
||||||
|
if note is not False:
|
||||||
|
self.logger.info(str(note) if note else 'Requesting webpage')
|
||||||
return self.ie._downloader.urlopen(req)
|
return self.ie._downloader.urlopen(req)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ from __future__ import annotations
|
|||||||
import base64
|
import base64
|
||||||
import contextlib
|
import contextlib
|
||||||
import enum
|
import enum
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
||||||
@ -58,7 +59,7 @@ def _extract_visitor_id(visitor_data):
|
|||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
||||||
# check that visitor id is all letters and numbers
|
# check that visitor id is all letters and numbers
|
||||||
if visitor_id.isalnum() and len(visitor_id) == 11:
|
if re.fullmatch(r'[A-Za-z0-9_-]{11}', visitor_id):
|
||||||
return visitor_id
|
return visitor_id
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import warnings
|
|||||||
|
|
||||||
from .common import (
|
from .common import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
PATCHRequest,
|
||||||
PUTRequest,
|
PUTRequest,
|
||||||
Request,
|
Request,
|
||||||
RequestDirector,
|
RequestDirector,
|
||||||
|
|||||||
@ -505,6 +505,7 @@ class Request:
|
|||||||
|
|
||||||
|
|
||||||
HEADRequest = functools.partial(Request, method='HEAD')
|
HEADRequest = functools.partial(Request, method='HEAD')
|
||||||
|
PATCHRequest = functools.partial(Request, method='PATCH')
|
||||||
PUTRequest = functools.partial(Request, method='PUT')
|
PUTRequest = functools.partial(Request, method='PUT')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2044,7 +2044,7 @@ def url_or_none(url):
|
|||||||
if not url or not isinstance(url, str):
|
if not url or not isinstance(url, str):
|
||||||
return None
|
return None
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?|wss?):)?//', url) else None
|
||||||
|
|
||||||
|
|
||||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user