Compare commits

...

3 Commits

Author SHA1 Message Date
Simon Sawicki
c316416b97
[rh:requests] Do not allocate 2GB on read (#13603)
Fixes c2ff2dbaec7929015373fe002e9bd4849931a4ce

Authored by: Grub4K
2025-07-02 01:42:00 +02:00
Simon Sawicki
e99c0b838a
[ie] Detect invalid m3u8 playlist data (#13601)
Authored by: Grub4K
2025-07-02 00:32:32 +02:00
Simon Sawicki
c2ff2dbaec
[rh:requests] Work around partial read dropping data (#13599)
Authored by: Grub4K
2025-07-02 00:12:43 +02:00
4 changed files with 93 additions and 15 deletions

View File

@ -36,6 +36,18 @@ class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
elif self.path == '/fake.m3u8':
self.send_response(200)
self.send_header('Content-Length', '1024')
self.end_headers()
self.wfile.write(1024 * b'\x00')
elif self.path == '/bipbop.m3u8':
with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
data = f.read()
self.send_response(200)
self.send_header('Content-Length', str(len(data)))
self.end_headers()
self.wfile.write(data)
else:
assert False
@ -2079,5 +2091,45 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
class TestInfoExtractorNetwork(unittest.TestCase):
def setUp(self, /):
self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
self.called = False
def require_warning(*args, **kwargs):
self.called = True
self.ydl = FakeYDL()
self.ydl.report_warning = require_warning
self.ie = DummyIE(self.ydl)
def tearDown(self, /):
self.ydl.close()
self.httpd.shutdown()
self.httpd.server_close()
self.server_thread.join(1)
def test_extract_m3u8_formats(self):
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
self.assertFalse(self.called)
self.assertTrue(formats)
self.assertTrue(subtitles)
def test_extract_m3u8_formats_warning(self):
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
self.assertFalse(formats)
self.assertFalse(subtitles)
if __name__ == '__main__':
unittest.main()

View File

@ -22,7 +22,6 @@ import ssl
import tempfile
import threading
import time
import urllib.error
import urllib.request
import warnings
import zlib
@ -223,10 +222,7 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
if encoding == 'br' and brotli:
payload = brotli.compress(payload)
elif encoding == 'gzip':
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
f.write(payload)
payload = buf.getvalue()
payload = gzip.compress(payload, mtime=0)
elif encoding == 'deflate':
payload = zlib.compress(payload)
elif encoding == 'unsupported':
@ -729,6 +725,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert 'X-test-heaDer: test' in res
def test_partial_read_then_full_read(self, handler):
with handler() as rh:
for encoding in ('', 'gzip', 'deflate'):
res = validate_and_send(rh, Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': encoding}))
assert res.headers.get('Content-Encoding') == encoding
assert res.read(6) == b'<html>'
assert res.read(0) == b''
assert res.read() == b'<video src="/vid.mp4" /></html>'
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
class TestClientCertificate:

View File

@ -1,5 +1,6 @@
import base64
import collections
import contextlib
import functools
import getpass
import http.client
@ -2129,21 +2130,33 @@ class InfoExtractor:
raise ExtractorError(errnote, video_id=video_id)
self.report_warning(f'{errnote}{bug_reports_message()}')
return [], {}
res = self._download_webpage_handle(
m3u8_url, video_id,
note='Downloading m3u8 information' if note is None else note,
errnote='Failed to download m3u8 information' if errnote is None else errnote,
if note is None:
note = 'Downloading m3u8 information'
if errnote is None:
errnote = 'Failed to download m3u8 information'
response = self._request_webpage(
m3u8_url, video_id, note=note, errnote=errnote,
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
if response is False:
return [], {}
m3u8_doc, urlh = res
m3u8_url = urlh.url
with contextlib.closing(response):
prefix = response.read(512)
if not prefix.startswith(b'#EXTM3U'):
msg = 'Response data has no m3u header'
if fatal:
raise ExtractorError(msg, video_id=video_id)
self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
return [], {}
content = self._webpage_read_content(
response, m3u8_url, video_id, note=note, errnote=errnote,
fatal=fatal, prefix=prefix, data=data)
if content is False:
return [], {}
return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
content, response.url, ext=ext, entry_protocol=entry_protocol,
preference=preference, quality=quality, m3u8_id=m3u8_id,
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
headers=headers, query=query, video_id=video_id)

View File

@ -140,6 +140,12 @@ class RequestsResponseAdapter(Response):
def read(self, amt: int | None = None):
try:
# Work around issue with `.read(amt)` then `.read()`
# See: https://github.com/urllib3/urllib3/issues/3636
if amt is None:
# Python 3.9 preallocates the whole read buffer, read in chunks
read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
return b''.join(iter(read_chunk, b''))
# Interact with urllib3 response directly.
return self.fp.read(amt, decode_content=True)