[ie/youtube] Fix extract_relative_time for abbreviated units (#16687)

Authored by: dialmaster
This commit is contained in:
Christopher Dial 2026-06-27 14:32:42 -07:00 committed by GitHub
parent d23e6f5a38
commit 6a24c96f7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 4 deletions

View File

@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.youtube._base import YoutubeBaseInfoExtractor
class TestYoutubeMisc(unittest.TestCase):
@ -21,6 +22,42 @@ class TestYoutubeMisc(unittest.TestCase):
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_extract_relative_time(self):
ert = YoutubeBaseInfoExtractor.extract_relative_time
# Abbreviated forms must equal their long-form equivalents.
self.assertEqual(ert('5d ago'), ert('5 days ago'))
self.assertEqual(ert('1mo ago'), ert('1 month ago'))
self.assertEqual(ert('2mo ago'), ert('2 months ago'))
self.assertEqual(ert('1y ago'), ert('1 year ago'))
self.assertEqual(ert('1yr ago'), ert('1 year ago'))
self.assertEqual(ert('3w ago'), ert('3 weeks ago'))
self.assertEqual(ert('3wk ago'), ert('3 weeks ago'))
self.assertIsNotNone(ert('30s ago'))
self.assertIsNotNone(ert('30sec ago'))
self.assertIsNotNone(ert('10min ago'))
self.assertIsNotNone(ert('5h ago'))
self.assertIsNotNone(ert('5hr ago'))
self.assertIsNotNone(ert('today'))
self.assertIsNotNone(ert('yesterday'))
self.assertIsNotNone(ert('now'))
self.assertEqual(ert('5 days ago'), ert('5 day ago'))
self.assertIsNotNone(ert('streamed 6 days ago'))
self.assertIsNotNone(ert('5 seconds ago (edited)'))
self.assertIsNotNone(ert('updated today'))
self.assertIsNotNone(ert('8 yr ago'))
self.assertIsNone(ert('not a date string'))
self.assertIsNone(ert(''))
# Small safety check to prevent "drift".
for unit in YoutubeBaseInfoExtractor._RELATIVE_TIME_UNIT_MAP:
self.assertIsNotNone(ert(f'1 {unit} ago'), f'unit {unit!r} did not parse')
if __name__ == '__main__':
unittest.main()

View File

@ -1184,8 +1184,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
})
return thumbnails
@staticmethod
def extract_relative_time(relative_time_text):
# Map abbreviated relative-time units to the long-form unit names that
# datetime_from_str() understands.
_RELATIVE_TIME_UNIT_MAP = {
's': 'second', 'sec': 'second', 'second': 'second',
'min': 'minute', 'minute': 'minute',
'h': 'hour', 'hr': 'hour', 'hour': 'hour',
'd': 'day', 'day': 'day',
'w': 'week', 'wk': 'week', 'week': 'week',
'mo': 'month', 'month': 'month',
'y': 'year', 'yr': 'year', 'year': 'year',
}
@classmethod
def extract_relative_time(cls, relative_time_text):
"""
Extracts a relative time from string and converts to dt object
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
@ -1195,15 +1207,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# The relative time text strings are roughly the same as what
# Javascript's Intl.RelativeTimeFormat function generates.
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
# Sort longest-first: regex alternation matches left-to-right, so short
# keys like 's' must come after 'sec'/'second' to avoid premature matches.
units = '|'.join(map(re.escape, sorted(cls._RELATIVE_TIME_UNIT_MAP, key=len, reverse=True)))
mobj = re.search(
r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
rf'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>{units})s?\s*ago',
relative_time_text)
if mobj:
start = mobj.group('start')
if start:
return datetime_from_str(start)
unit = cls._RELATIVE_TIME_UNIT_MAP[mobj.group('unit')]
try:
return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
return datetime_from_str(f'now-{mobj.group("time")}{unit}')
except ValueError:
return None