Compare commits

..

No commits in common. "5c1f6e732927765d1463de991dab52837d2fc393" and "200d27682d8c624c308eb3a3ad814fe499dbe71f" have entirely different histories.

5 changed files with 54 additions and 29 deletions

View File

@ -2,7 +2,6 @@
# Allow direct execution
import os
import platform
import sys
import unittest
from unittest.mock import patch
@ -715,7 +714,7 @@ class TestYoutubeDL(unittest.TestCase):
def test_prepare_outtmpl_and_filename(self):
def test(tmpl, expected, *, info=None, **params):
if 'trim_file_name' not in params:
params['trim_file_name'] = 'notrim' # disable trimming
params['trim_file_name'] = 0 # disable trimming
params['outtmpl'] = tmpl
ydl = FakeYDL(params)
ydl._num_downloads = 1
@ -931,16 +930,12 @@ class TestYoutubeDL(unittest.TestCase):
# --trim-filenames
test('%(title6)s.%(ext)s', '' * 10 + '.mp4')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='3c')
if sys.getfilesystemencoding() == 'utf-8' and platform.system() != 'Windows':
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='9b')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='10b')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='11b')
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='12b')
elif platform.system() == 'Windows':
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='8b')
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='9b')
test('%(title6)s.%(ext)s', '' * 5 + '.mp4', trim_file_name='10b')
test('folder/%(title6)s.%(ext)s', f'fol{os.path.sep}あああ.mp4', trim_file_name='3c')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='9b', filesystem_encoding='utf-8')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='10b', filesystem_encoding='utf-8')
test('%(title6)s.%(ext)s', '' * 3 + '.mp4', trim_file_name='11b', filesystem_encoding='utf-8')
test('%(title6)s.%(ext)s', '' * 4 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-8')
test('%(title6)s.%(ext)s', '' * 6 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-16le')
test('folder/%(title6)s.%(ext)s', f'folder{os.path.sep}あああ.mp4', trim_file_name='3c')
def test_format_note(self):
ydl = YoutubeDL()

View File

@ -12,8 +12,6 @@ import json
import locale
import operator
import os
from pathlib import Path
import platform
import random
import re
import shutil
@ -72,6 +70,7 @@ from .update import (
)
from .utils import (
DEFAULT_OUTTMPL,
DEFAULT_MAX_FILE_NAME,
IDENTITY,
LINK_TEMPLATES,
MEDIA_EXTENSIONS,
@ -268,6 +267,7 @@ class YoutubeDL:
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
trim_file_name: Limit length of filename (extension excluded)
filesystem_encoding: Encoding to use when calculating filename length in bytes
windowsfilenames: True: Force filenames to be Windows compatible
False: Sanitize filenames only minimally
This option has no effect when running on Windows
@ -1430,7 +1430,7 @@ class YoutubeDL:
if not trim_filename:
return self.escape_outtmpl(outtmpl) % info_dict
ext_suffix = '.%(ext\0s)s'
ext_suffix = '.%(ext\x00s)s' # not sure why this has null char
suffix = ''
if outtmpl.endswith(ext_suffix):
outtmpl = outtmpl[:-len(ext_suffix)]
@ -1438,28 +1438,49 @@ class YoutubeDL:
outtmpl = self.escape_outtmpl(outtmpl)
filename = outtmpl % info_dict
def parse_trim_file_name(trim_file_name):
if trim_file_name is None or trim_file_name == 'notrim':
return 0, None
mobj = re.match(r'(?:(?P<length>\d+)(?P<mode>b|c)?|notrim)', trim_file_name)
return int(mobj.group('length')), mobj.group('mode') or 'c'
def parse_max_file_name(max_file_name: str):
# old --trim-filenames format
try:
return 'c', int(max_file_name)
except ValueError:
pass
max_file_name, mode = parse_trim_file_name(self.params.get('trim_file_name'))
try:
max_length = int(max_file_name[:-1])
except ValueError:
raise ValueError('Invalid --trim-filenames specified')
if max_file_name[-1].lower() == 'c':
return 'c', max_length
elif max_file_name[-1].lower() == 'b':
return 'b', max_length
else:
raise ValueError("--trim-filenames must end with 'b' or 'c'")
max_file_name = self.params.get('trim_file_name')
if max_file_name is None:
max_file_name = DEFAULT_MAX_FILE_NAME
mode, max_file_name = parse_max_file_name(max_file_name)
if max_file_name < 0:
raise ValueError('Invalid --trim-filenames specified')
if max_file_name == 0:
# no maximum
return filename + suffix
encoding = sys.getfilesystemencoding() if platform.system() != 'Windows' else 'utf-16-le'
encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding()
def trim_filename(name: str):
def trim_filename(name: str, length: int):
if mode == 'b':
name = name.encode(encoding)
name = name[:max_file_name]
name = name[:length]
return name.decode(encoding, 'ignore')
else:
return name[:max_file_name]
return name[:length]
filename = os.path.join(*map(trim_filename, Path(filename).parts))
# only trim last component of path - assume the directories are valid names
head, tail = os.path.split(filename)
tail = trim_filename(tail, max_file_name)
filename = os.path.join(head, tail)
return filename + suffix
@_catch_unsafe_extension_error

View File

@ -429,8 +429,6 @@ def validate_options(opts):
}
# Other options
validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|notrim)')
if opts.playlist_items is not None:
try:
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
@ -888,6 +886,7 @@ def parse_options(argv=None):
'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name,
'filesystem_encoding': opts.filesystem_encoding,
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,

View File

@ -1378,8 +1378,12 @@ def create_parser():
help='Sanitize filenames only minimally')
filesystem.add_option(
'--trim-filenames', '--trim-file-names', metavar='LENGTH',
dest='trim_file_name', default='notrim',
dest='trim_file_name',
help='Limit the filename length (excluding extension) to the specified number of characters or bytes')
filesystem.add_option(
'--filesystem-encoding', metavar='ENCODING',
dest='filesystem_encoding',
help='Override filesystem encoding used when calculating filename length in bytes')
filesystem.add_option(
'-w', '--no-overwrites',
action='store_false', dest='overwrites', default=None,

View File

@ -2852,6 +2852,12 @@ OUTTMPL_TYPES = {
'pl_infojson': 'info.json',
}
# https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits
if platform.system() in ('Darwin', 'Windows'):
DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml")}c'
else:
DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml".encode(sys.getfilesystemencoding()))}b'
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting