Compare commits

...

2 Commits

Author SHA1 Message Date
Simon Sawicki
abade83f8d
[cleanup] Bump ruff to 0.15.x (#15951)
Authored by: Grub4K
2026-02-16 20:11:02 +00:00
bashonly
43229d1d5f
[cookies] Ignore cookies with control characters (#15862)
http.cookies.Morsel was patched in Python 3.14.3 and 3.13.12
to raise a CookieError if the cookie name, value or any attribute
of its input contains a control character.

yt_dlp.cookies.LenientSimpleCookie now preemptively discards
any cookies containing control characters, which is consistent
with its more lenient parsing.

Ref: https://github.com/python/cpython/issues/143919

Closes #15849
Authored by: bashonly, syphyr

Co-authored-by: syphyr <syphyr@gmail.com>
2026-02-16 19:59:34 +00:00
9 changed files with 82 additions and 26 deletions

View File

@ -85,7 +85,7 @@ dev = [
]
static-analysis = [
"autopep8~=2.0",
"ruff~=0.14.0",
"ruff~=0.15.0",
]
test = [
"pytest~=8.1",

View File

@ -294,7 +294,7 @@ def expect_info_dict(self, got_dict, expected_dict):
missing_keys = sorted(
test_info_dict.keys() - expected_dict.keys(),
key=lambda x: ALLOWED_KEYS_SORT_ORDER.index(x))
key=ALLOWED_KEYS_SORT_ORDER.index)
if missing_keys:
def _repr(v):
if isinstance(v, str):

View File

@ -205,8 +205,8 @@ class TestLenientSimpleCookie(unittest.TestCase):
),
(
'Test quoted cookie',
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
{'keebler': 'E=mc2; L="Loves"; fudge=\012;'},
'keebler="E=mc2; L=\\"Loves\\"; fudge=;"',
{'keebler': 'E=mc2; L="Loves"; fudge=;'},
),
(
"Allow '=' in an unquoted value",
@ -328,4 +328,30 @@ class TestLenientSimpleCookie(unittest.TestCase):
'Key=Value; [Invalid]=Value; Another=Value',
{'Key': 'Value', 'Another': 'Value'},
),
# Ref: https://github.com/python/cpython/issues/143919
(
'Test invalid cookie name w/ control character',
'foo\012=bar;',
{},
),
(
'Test invalid cookie name w/ control character 2',
'foo\015baz=bar',
{},
),
(
'Test invalid cookie name w/ control character followed by valid cookie',
'foo\015=bar; x=y;',
{'x': 'y'},
),
(
'Test invalid cookie value w/ control character',
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
{},
),
(
'Test invalid quoted attribute value w/ control character',
'Customer="WILE_E_COYOTE"; Version="1\\012"; Path="/acme"',
{},
),
)

View File

@ -1004,6 +1004,7 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
class TestRequestsRequestHandler(TestRequestHandlerBase):
# ruff: disable[PLW0108] `requests` and/or `urllib3` may not be available
@pytest.mark.parametrize('raised,expected', [
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
(lambda: requests.exceptions.ReadTimeout(), TransportError),
@ -1017,8 +1018,10 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
# catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
(lambda: urllib3.exceptions.HTTPError(), TransportError),
(lambda: requests.exceptions.RequestException(), RequestError),
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
# Needs a response object
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError),
])
# ruff: enable[PLW0108]
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
with handler() as rh:
def mock_get_instance(*args, **kwargs):
@ -1034,6 +1037,7 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
assert exc_info.type is expected
# ruff: disable[PLW0108] `urllib3` may not be available
@pytest.mark.parametrize('raised,expected,match', [
(lambda: urllib3.exceptions.SSLError(), SSLError, None),
(lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
@ -1052,6 +1056,7 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
'3 bytes read, 5 more expected',
),
])
# ruff: enable[PLW0108]
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
from requests.models import Response as RequestsResponse
from urllib3.response import HTTPResponse as Urllib3Response

View File

@ -239,6 +239,7 @@ class TestTraversal:
'accept matching `expected_type` type'
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int) is None, \
'reject non matching `expected_type` type'
# ruff: noqa: PLW0108 `type`s get special treatment, so wrap in lambda
assert traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)) == '0', \
'transform type using type function'
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0) is None, \

View File

@ -448,6 +448,7 @@ def create_fake_ws_connection(raised):
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
class TestWebsocketsRequestHandler:
# ruff: disable[PLW0108] `websockets` may not be available
@pytest.mark.parametrize('raised,expected', [
# https://websockets.readthedocs.io/en/stable/reference/exceptions.html
(lambda: websockets.exceptions.InvalidURI(msg='test', uri='test://'), RequestError),
@ -459,13 +460,14 @@ class TestWebsocketsRequestHandler:
(lambda: websockets.exceptions.NegotiationError(), TransportError),
# Catch-all
(lambda: websockets.exceptions.WebSocketException(), TransportError),
(lambda: TimeoutError(), TransportError),
(TimeoutError, TransportError),
# These may be raised by our create_connection implementation, which should also be caught
(lambda: OSError(), TransportError),
(lambda: ssl.SSLError(), SSLError),
(lambda: ssl.SSLCertVerificationError(), CertificateVerifyError),
(lambda: socks.ProxyError(), ProxyError),
(OSError, TransportError),
(ssl.SSLError, SSLError),
(ssl.SSLCertVerificationError, CertificateVerifyError),
(socks.ProxyError, ProxyError),
])
# ruff: enable[PLW0108]
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
import websockets.sync.client
@ -482,11 +484,12 @@ class TestWebsocketsRequestHandler:
@pytest.mark.parametrize('raised,expected,match', [
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
(lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
(lambda: RuntimeError(), TransportError, None),
(lambda: TimeoutError(), TransportError, None),
(lambda: TypeError(), RequestError, None),
(lambda: socks.ProxyError(), ProxyError, None),
(RuntimeError, TransportError, None),
(TimeoutError, TransportError, None),
(TypeError, RequestError, None),
(socks.ProxyError, ProxyError, None),
# Catch-all
# ruff: noqa: PLW0108 `websockets` may not be available
(lambda: websockets.exceptions.WebSocketException(), TransportError, None),
])
def test_ws_send_error_mapping(self, handler, monkeypatch, raised, expected, match):
@ -499,10 +502,11 @@ class TestWebsocketsRequestHandler:
@pytest.mark.parametrize('raised,expected,match', [
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
(lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
(lambda: RuntimeError(), TransportError, None),
(lambda: TimeoutError(), TransportError, None),
(lambda: socks.ProxyError(), ProxyError, None),
(RuntimeError, TransportError, None),
(TimeoutError, TransportError, None),
(socks.ProxyError, ProxyError, None),
# Catch-all
# ruff: noqa: PLW0108 `websockets` may not be available
(lambda: websockets.exceptions.WebSocketException(), TransportError, None),
])
def test_ws_recv_error_mapping(self, handler, monkeypatch, raised, expected, match):

View File

@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
# We use Morsel's legal key chars to avoid errors on setting values
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII)
_RESERVED = {
'expires',
@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r'''
\s* # Optional whitespace at start of cookie
[ ]* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
[^ =;]+ # Match almost anything here for now and validate later
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
[ ]*=[ ]* # Equal Sign
( # Start of potential value
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
\w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr
| # or
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
) # End of group 'val'
@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
) # End of potential value
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
[ ]* # Any number of spaces.
([ ]+|;|$) # Ending either at space, semicolon, or EOS.
''', re.ASCII | re.VERBOSE)
# http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+
# Ref: https://github.com/python/cpython/issues/143919
_CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]')
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
if not isinstance(data, str):
@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
continue
key, value = match.group('key', 'val')
if not self._LEGAL_KEY_RE.fullmatch(key):
morsel = None
continue
is_attribute = False
if key.startswith('$'):
@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
value = True
else:
value, _ = self.value_decode(value)
# Guard against control characters in quoted attribute values
if self._CONTROL_CHARACTER_RE.search(value):
# While discarding the entire morsel is not very lenient,
# it's better than http.cookies.Morsel raising a CookieError
# and it's probably better to err on the side of caution
self.pop(morsel.key, None)
morsel = None
continue
morsel[key] = value
@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
elif value is not None:
morsel = self.get(key, http.cookies.Morsel())
real_value, coded_value = self.value_decode(value)
# Guard against control characters in quoted cookie values
if self._CONTROL_CHARACTER_RE.search(real_value):
morsel = None
continue
morsel.set(key, real_value, coded_value)
self[key] = morsel

View File

@ -124,7 +124,7 @@ class BilibiliBaseIE(InfoExtractor):
**traverse_obj(play_info, {
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}),
'format_note': ('quality', {format_names.get}),
'duration': ('timelength', {float_or_none(scale=1000)}),
}),
**parse_resolution(format_names.get(play_info.get('quality'))),

View File

@ -146,8 +146,8 @@ class SBSIE(InfoExtractor):
'release_year': ('releaseYear', {int_or_none}),
'duration': ('duration', ({float_or_none}, {parse_duration})),
'is_live': ('liveStream', {bool}),
'age_limit': (('classificationID', 'contentRating'), {str.upper}, {
lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7
'age_limit': (
('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}),
}, get_all=False),
**traverse_obj(media, {
'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),