mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-27 11:05:54 +00:00
fix: expand webpo visitor id character set support
This commit is contained in:
parent
4ee46531c9
commit
7b0dd8b2d1
@ -29,8 +29,8 @@ class TestGetWebPoContentBinding:
|
|||||||
assert get_webpo_content_binding(pot_request) == expected
|
assert get_webpo_content_binding(pot_request) == expected
|
||||||
|
|
||||||
def test_extract_visitor_id(self, pot_request):
|
def test_extract_visitor_id(self, pot_request):
|
||||||
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D'
|
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D'
|
||||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('12345678901', ContentBindingType.VISITOR_ID)
|
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID)
|
||||||
|
|
||||||
def test_invalid_visitor_id(self, pot_request):
|
def test_invalid_visitor_id(self, pot_request):
|
||||||
# visitor id not alphanumeric (i.e. protobuf extraction failed)
|
# visitor id not alphanumeric (i.e. protobuf extraction failed)
|
||||||
|
|||||||
@ -16,7 +16,7 @@ from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers
|
|||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def pot_request(pot_request) -> PoTokenRequest:
|
def pot_request(pot_request) -> PoTokenRequest:
|
||||||
pot_request.visitor_data = 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D' # visitor_id=12345678901
|
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_-
|
||||||
return pot_request
|
return pot_request
|
||||||
|
|
||||||
|
|
||||||
@ -51,13 +51,13 @@ class TestWebPoPCSP:
|
|||||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}),
|
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'video_id'}),
|
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||||
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
|
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
|
||||||
]],
|
]],
|
||||||
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '12345678901', 'cbt': 'visitor_id'}),
|
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||||
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '12345678901', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
|
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
|
||||||
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '12345678901', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
|
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
|
||||||
|
|
||||||
])
|
])
|
||||||
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
|
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
|
||||||
@ -68,7 +68,7 @@ class TestWebPoPCSP:
|
|||||||
pot_request.innertube_context['client']['remoteHost'] = remote_host
|
pot_request.innertube_context['client']['remoteHost'] = remote_host
|
||||||
pot_request.request_source_address = source_address
|
pot_request.request_source_address = source_address
|
||||||
pot_request.request_proxy = request_proxy
|
pot_request.request_proxy = request_proxy
|
||||||
pot_request.video_id = '12345678901' # same as visitor id to test type
|
pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type
|
||||||
|
|
||||||
assert pcs.generate_cache_spec(pot_request).key_bindings == expected
|
assert pcs.generate_cache_spec(pot_request).key_bindings == expected
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ class TestWebPoPCSP:
|
|||||||
pot_request.innertube_context['client']['clientName'] = 'WEB'
|
pot_request.innertube_context['client']['clientName'] = 'WEB'
|
||||||
pot_request.context = PoTokenContext.GVS
|
pot_request.context = PoTokenContext.GVS
|
||||||
pot_request.is_authenticated = False
|
pot_request.is_authenticated = False
|
||||||
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjM0NTY3ODkwMSiA4s-qBg%3D%3D', 'cbt': 'visitor_data'}
|
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'}
|
||||||
|
|
||||||
def test_default_ttl(self, ie, logger, pot_request):
|
def test_default_ttl(self, ie, logger, pot_request):
|
||||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||||
|
|||||||
@ -5,6 +5,7 @@ from __future__ import annotations
|
|||||||
import base64
|
import base64
|
||||||
import contextlib
|
import contextlib
|
||||||
import enum
|
import enum
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
||||||
@ -58,7 +59,7 @@ def _extract_visitor_id(visitor_data):
|
|||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
visitor_id = base64.urlsafe_b64decode(urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
||||||
# check that visitor id is all letters and numbers
|
# check that visitor id is all letters and numbers
|
||||||
if visitor_id.isalnum() and len(visitor_id) == 11:
|
if re.fullmatch(r'[A-Za-z0-9_-]{11}', visitor_id):
|
||||||
return visitor_id
|
return visitor_id
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user