From 72a4a46152ebb559eb603c0a2f50b940df31c67a Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 18 Apr 2025 17:21:26 +1200 Subject: [PATCH] refactor: provider _urlopen into _request_webpage, make pot_request optional --- test/test_pot/test_pot_framework.py | 43 ++++++++++++++++++++---- yt_dlp/extractor/youtube/_video.py | 2 +- yt_dlp/extractor/youtube/pot/README.md | 19 ++++++----- yt_dlp/extractor/youtube/pot/provider.py | 25 +++++++++----- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/test/test_pot/test_pot_framework.py b/test/test_pot/test_pot_framework.py index fc78e996a0..fabfc92c09 100644 --- a/test/test_pot/test_pot_framework.py +++ b/test/test_pot/test_pot_framework.py @@ -149,7 +149,7 @@ class TestPoTokenProvider: with pytest.raises(PoTokenProviderRejectedRequest): provider.request_pot(pot_request) - def test_provider_urlopen(self, ie, logger, pot_request): + def test_provider_request_webpage(self, ie, logger, pot_request): provider = ExamplePTP(ie=ie, logger=logger, settings={}) cookiejar = YoutubeDLCookieJar() @@ -162,16 +162,17 @@ class TestPoTokenProvider: ie._downloader.urlopen = mock_urlopen - sent_request = provider._urlopen(pot_request, Request( + sent_request = provider._request_webpage(Request( 'https://example.com', - )) + ), pot_request=pot_request) assert sent_request.url == 'https://example.com' assert sent_request.headers['User-Agent'] == 'example-user-agent' assert sent_request.proxies == {'all': 'socks5://example-proxy.com'} assert sent_request.extensions['cookiejar'] is cookiejar + assert 'Requesting webpage' in logger.messages['info'] - def test_provider_urlopen_override(self, ie, logger, pot_request): + def test_provider_request_webpage_override(self, ie, logger, pot_request): provider = ExamplePTP(ie=ie, logger=logger, settings={}) cookiejar_request = YoutubeDLCookieJar() @@ -184,17 +185,47 @@ class TestPoTokenProvider: ie._downloader.urlopen = mock_urlopen - sent_request = provider._urlopen(pot_request, Request( + sent_request = provider._request_webpage(Request( 'https://example.com', headers={'User-Agent': 'override-user-agent-override'}, proxies={'http': 'http://example-proxy-override.com'}, extensions={'cookiejar': YoutubeDLCookieJar()}, - )) + ), pot_request=pot_request, note='Custom requesting webpage') assert sent_request.url == 'https://example.com' assert sent_request.headers['User-Agent'] == 'override-user-agent-override' assert sent_request.proxies == {'http': 'http://example-proxy-override.com'} assert sent_request.extensions['cookiejar'] is not cookiejar_request + assert 'Custom requesting webpage' in logger.messages['info'] + + def test_provider_request_webpage_no_log(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + ), note=False) + + assert sent_request.url == 'https://example.com' + assert 'info' not in logger.messages + + def test_provider_request_webpage_no_pot_request(self, ie, logger): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + ), pot_request=None) + + assert sent_request.url == 'https://example.com' def test_get_config_arg(self, ie, logger): provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']}) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index c391f64d48..4013898c48 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2943,7 +2943,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.write_debug(f'{kwargs.get("video_id")}: No {pot_request.context.value} PO Token available for {client} client') return - self.write_debug(f'{kwargs.get("video_id")}: Fetched a {pot_request.context.value} PO Token for {client} client') + self.write_debug(f'{kwargs.get("video_id")}: Retrieved a {pot_request.context.value} PO Token for {client} client') return po_token @staticmethod diff --git a/yt_dlp/extractor/youtube/pot/README.md b/yt_dlp/extractor/youtube/pot/README.md index 033acc35a6..19de1eebcb 100644 --- a/yt_dlp/extractor/youtube/pot/README.md +++ b/yt_dlp/extractor/youtube/pot/README.md @@ -89,16 +89,16 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP # ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubepot-`. # For this example, the extractor arg would be `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"` external_provider_url = self._configuration_arg('url', default=['https://provider.example.com/get_pot'])[0] - + + # See below for logging guidelines + self.logger.trace(f'Using external provider URL: {external_provider_url}') + # You should use the internal HTTP client to make requests where possible, # as it will handle cookies and other networking settings passed to yt-dlp. try: - # See below for logging guidelines - self.logger.info(f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider') - - # See docstring in _urlopen method for request tips - response = self._urlopen( - request, Request(external_provider_url, data=json.dumps({ + # See docstring in _request_webpage method for request tips + response = self._request_webpage( + Request(external_provider_url, data=json.dumps({ 'content_binding': get_webpo_content_binding(request), 'proxy': request.request_proxy, 'headers': request.request_headers, @@ -107,7 +107,10 @@ class MyPoTokenProviderPTP(PoTokenProvider): # Provider name must end with "PTP # Important: If your provider has its own caching, please respect `bypass_cache`. # This may be used in the future to request a fresh PO Token if required. 'do_not_cache': request.bypass_cache, - }).encode(), proxies={'all': None})) + }).encode(), proxies={'all': None}), + pot_request=request, + note=f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider', + ) except RequestError as e: # ℹ️ If there is an error, raise PoTokenProviderError. diff --git a/yt_dlp/extractor/youtube/pot/provider.py b/yt_dlp/extractor/youtube/pot/provider.py index 94ed30dcd5..6cddb46a5d 100644 --- a/yt_dlp/extractor/youtube/pot/provider.py +++ b/yt_dlp/extractor/youtube/pot/provider.py @@ -17,7 +17,7 @@ from yt_dlp.extractor.youtube.pot._provider import ( register_provider_generic, ) from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences -from yt_dlp.networking import Request +from yt_dlp.networking import Request, Response from yt_dlp.utils import traverse_obj from yt_dlp.utils.networking import HTTPHeaderDict @@ -135,27 +135,34 @@ class PoTokenProvider(IEContentProvider, abc.ABC, suffix='PTP'): # Helper functions - def _urlopen(self, pot_request: PoTokenRequest, http_request: Request): - """Make a request using the request parameters from the PoTokenRequest. - Use this instead of calling requests, urllib3 or other HTTP client libraries directly!! + def _request_webpage(self, request: Request, pot_request: PoTokenRequest | None = None, note=None, **kwargs) -> Response: + """Make a request using the internal HTTP Client. + Use this instead of calling requests, urllib3 or other HTTP client libraries directly! YouTube cookies will be automatically applied if this request is made to YouTube. + @param request: The request to make + @param pot_request: The PoTokenRequest to use. Request parameters will be merged from it. + @param note: Custom log message to display when making the request. Set to `False` to disable logging. + Tips: - Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None}) - Set request timeout: Request(..., extensions={'timeout': 5.0}) """ - req = http_request.copy() + req = request.copy() # Merge some ctx request settings into the request # Most of these will already be used by the configured ydl instance, # however, the YouTube extractor may override some. - req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers) - req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {}) + if pot_request is not None: + req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers) + req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {}) - if pot_request.request_cookiejar is not None: - req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar) + if pot_request.request_cookiejar is not None: + req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar) + if note is not False: + self.logger.info(str(note) if note else 'Requesting webpage') return self.ie._downloader.urlopen(req)