Compare commits

...

6 Commits

Author SHA1 Message Date
7x11x13
5c1f6e7329 Remove --filesystem-encoding 2025-01-08 15:06:14 -05:00
7x11x13
8063b142d9 Remove --filesystem-encoding option 2025-01-08 15:00:02 -05:00
7x11x13
ac69f9474b Refactor, update tests 2025-01-08 13:29:18 -05:00
7x11x13
25dca22cb7 Trim all path parts 2025-01-08 13:18:43 -05:00
7x11x13
8949a4fef0 Refactor, make default notrim 2025-01-08 13:03:15 -05:00
gavin
ba9cfb23f2
Update yt_dlp/YoutubeDL.py
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
2025-01-08 11:57:48 -05:00
5 changed files with 29 additions and 54 deletions

View File

@ -2,6 +2,7 @@
# Allow direct execution # Allow direct execution
import os import os
import platform
import sys import sys
import unittest import unittest
from unittest.mock import patch from unittest.mock import patch
@ -714,7 +715,7 @@ class TestYoutubeDL(unittest.TestCase):
def test_prepare_outtmpl_and_filename(self): def test_prepare_outtmpl_and_filename(self):
def test(tmpl, expected, *, info=None, **params): def test(tmpl, expected, *, info=None, **params):
if 'trim_file_name' not in params: if 'trim_file_name' not in params:
params['trim_file_name'] = 0 # disable trimming params['trim_file_name'] = 'notrim' # disable trimming
params['outtmpl'] = tmpl params['outtmpl'] = tmpl
ydl = FakeYDL(params) ydl = FakeYDL(params)
ydl._num_downloads = 1 ydl._num_downloads = 1
@ -930,12 +931,16 @@ class TestYoutubeDL(unittest.TestCase):
# --trim-filenames # --trim-filenames
test('%(title6)s.%(ext)s', '' * 10 + '.mp4') test('%(title6)s.%(ext)s', '' * 10 + '.mp4')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='3c') test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='3c')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='9b', filesystem_encoding='utf-8') if sys.getfilesystemencoding() == 'utf-8' and platform.system() != 'Windows':
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='10b', filesystem_encoding='utf-8') test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='9b')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='11b', filesystem_encoding='utf-8') test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='10b')
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-8') test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='11b')
test('%(title6)s.%(ext)s', '' * 6 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-16le') test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='12b')
test('folder/%(title6)s.%(ext)s', f'folder{os.path.sep}あああ.mp4', trim_file_name='3c') elif platform.system() == 'Windows':
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='8b')
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='9b')
test('%(title6)s.%(ext)s', '' * 5 + '.mp4', trim_file_name='10b')
test('folder/%(title6)s.%(ext)s', f'fol{os.path.sep}あああ.mp4', trim_file_name='3c')
def test_format_note(self): def test_format_note(self):
ydl = YoutubeDL() ydl = YoutubeDL()

View File

@ -12,6 +12,8 @@ import json
import locale import locale
import operator import operator
import os import os
from pathlib import Path
import platform
import random import random
import re import re
import shutil import shutil
@ -70,7 +72,6 @@ from .update import (
) )
from .utils import ( from .utils import (
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
DEFAULT_MAX_FILE_NAME,
IDENTITY, IDENTITY,
LINK_TEMPLATES, LINK_TEMPLATES,
MEDIA_EXTENSIONS, MEDIA_EXTENSIONS,
@ -267,7 +268,6 @@ class YoutubeDL:
outtmpl_na_placeholder: Placeholder for unavailable meta fields. outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names restrictfilenames: Do not allow "&" and spaces in file names
trim_file_name: Limit length of filename (extension excluded) trim_file_name: Limit length of filename (extension excluded)
filesystem_encoding: Encoding to use when calculating filename length in bytes
windowsfilenames: True: Force filenames to be Windows compatible windowsfilenames: True: Force filenames to be Windows compatible
False: Sanitize filenames only minimally False: Sanitize filenames only minimally
This option has no effect when running on Windows This option has no effect when running on Windows
@ -1430,7 +1430,7 @@ class YoutubeDL:
if not trim_filename: if not trim_filename:
return self.escape_outtmpl(outtmpl) % info_dict return self.escape_outtmpl(outtmpl) % info_dict
ext_suffix = '.%(ext\x00s)s' # not sure why this has null char ext_suffix = '.%(ext\0s)s'
suffix = '' suffix = ''
if outtmpl.endswith(ext_suffix): if outtmpl.endswith(ext_suffix):
outtmpl = outtmpl[:-len(ext_suffix)] outtmpl = outtmpl[:-len(ext_suffix)]
@ -1438,49 +1438,28 @@ class YoutubeDL:
outtmpl = self.escape_outtmpl(outtmpl) outtmpl = self.escape_outtmpl(outtmpl)
filename = outtmpl % info_dict filename = outtmpl % info_dict
def parse_max_file_name(max_file_name: str): def parse_trim_file_name(trim_file_name):
# old --trim-filenames format if trim_file_name is None or trim_file_name == 'notrim':
try: return 0, None
return 'c', int(max_file_name) mobj = re.match(r'(?:(?P<length>\d+)(?P<mode>b|c)?|notrim)', trim_file_name)
except ValueError: return int(mobj.group('length')), mobj.group('mode') or 'c'
pass
try: max_file_name, mode = parse_trim_file_name(self.params.get('trim_file_name'))
max_length = int(max_file_name[:-1])
except ValueError:
raise ValueError('Invalid --trim-filenames specified')
if max_file_name[-1].lower() == 'c':
return 'c', max_length
elif max_file_name[-1].lower() == 'b':
return 'b', max_length
else:
raise ValueError("--trim-filenames must end with 'b' or 'c'")
max_file_name = self.params.get('trim_file_name')
if max_file_name is None:
max_file_name = DEFAULT_MAX_FILE_NAME
mode, max_file_name = parse_max_file_name(max_file_name)
if max_file_name < 0:
raise ValueError('Invalid --trim-filenames specified')
if max_file_name == 0: if max_file_name == 0:
# no maximum # no maximum
return filename + suffix return filename + suffix
encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding() encoding = sys.getfilesystemencoding() if platform.system() != 'Windows' else 'utf-16-le'
def trim_filename(name: str, length: int): def trim_filename(name: str):
if mode == 'b': if mode == 'b':
name = name.encode(encoding) name = name.encode(encoding)
name = name[:length] name = name[:max_file_name]
return name.decode(encoding, 'ignore') return name.decode(encoding, 'ignore')
else: else:
return name[:length] return name[:max_file_name]
# only trim last component of path - assume the directories are valid names filename = os.path.join(*map(trim_filename, Path(filename).parts))
head, tail = os.path.split(filename)
tail = trim_filename(tail, max_file_name)
filename = os.path.join(head, tail)
return filename + suffix return filename + suffix
@_catch_unsafe_extension_error @_catch_unsafe_extension_error

View File

@ -429,6 +429,8 @@ def validate_options(opts):
} }
# Other options # Other options
validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|notrim)')
if opts.playlist_items is not None: if opts.playlist_items is not None:
try: try:
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
@ -886,7 +888,6 @@ def parse_options(argv=None):
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name, 'trim_file_name': opts.trim_file_name,
'filesystem_encoding': opts.filesystem_encoding,
'verbose': opts.verbose, 'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages, 'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages, 'write_pages': opts.write_pages,

View File

@ -1378,12 +1378,8 @@ def create_parser():
help='Sanitize filenames only minimally') help='Sanitize filenames only minimally')
filesystem.add_option( filesystem.add_option(
'--trim-filenames', '--trim-file-names', metavar='LENGTH', '--trim-filenames', '--trim-file-names', metavar='LENGTH',
dest='trim_file_name', dest='trim_file_name', default='notrim',
help='Limit the filename length (excluding extension) to the specified number of characters or bytes') help='Limit the filename length (excluding extension) to the specified number of characters or bytes')
filesystem.add_option(
'--filesystem-encoding', metavar='ENCODING',
dest='filesystem_encoding',
help='Override filesystem encoding used when calculating filename length in bytes')
filesystem.add_option( filesystem.add_option(
'-w', '--no-overwrites', '-w', '--no-overwrites',
action='store_false', dest='overwrites', default=None, action='store_false', dest='overwrites', default=None,

View File

@ -2852,12 +2852,6 @@ OUTTMPL_TYPES = {
'pl_infojson': 'info.json', 'pl_infojson': 'info.json',
} }
# https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits
if platform.system() in ('Darwin', 'Windows'):
DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml")}c'
else:
DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml".encode(sys.getfilesystemencoding()))}b'
# As of [1] format syntax is: # As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting