From 6a24c96f7f61e5e651466cc3d4c6a30982318efe Mon Sep 17 00:00:00 2001 From: Christopher Dial Date: Sat, 27 Jun 2026 14:32:42 -0700 Subject: [PATCH] [ie/youtube] Fix `extract_relative_time` for abbreviated units (#16687) Authored by: dialmaster --- test/test_youtube_misc.py | 37 +++++++++++++++++++++++++++++++ yt_dlp/extractor/youtube/_base.py | 24 ++++++++++++++++---- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 81b116217a..ad3c498a66 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.extractor import YoutubeIE +from yt_dlp.extractor.youtube._base import YoutubeBaseInfoExtractor class TestYoutubeMisc(unittest.TestCase): @@ -21,6 +22,42 @@ class TestYoutubeMisc(unittest.TestCase): assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + def test_extract_relative_time(self): + ert = YoutubeBaseInfoExtractor.extract_relative_time + + # Abbreviated forms must equal their long-form equivalents. + self.assertEqual(ert('5d ago'), ert('5 days ago')) + self.assertEqual(ert('1mo ago'), ert('1 month ago')) + self.assertEqual(ert('2mo ago'), ert('2 months ago')) + self.assertEqual(ert('1y ago'), ert('1 year ago')) + self.assertEqual(ert('1yr ago'), ert('1 year ago')) + self.assertEqual(ert('3w ago'), ert('3 weeks ago')) + self.assertEqual(ert('3wk ago'), ert('3 weeks ago')) + + self.assertIsNotNone(ert('30s ago')) + self.assertIsNotNone(ert('30sec ago')) + self.assertIsNotNone(ert('10min ago')) + self.assertIsNotNone(ert('5h ago')) + self.assertIsNotNone(ert('5hr ago')) + + self.assertIsNotNone(ert('today')) + self.assertIsNotNone(ert('yesterday')) + self.assertIsNotNone(ert('now')) + + self.assertEqual(ert('5 days ago'), ert('5 day ago')) + + self.assertIsNotNone(ert('streamed 6 days ago')) + self.assertIsNotNone(ert('5 seconds ago (edited)')) + self.assertIsNotNone(ert('updated today')) + self.assertIsNotNone(ert('8 yr ago')) + + self.assertIsNone(ert('not a date string')) + self.assertIsNone(ert('')) + + # Small safety check to prevent "drift". + for unit in YoutubeBaseInfoExtractor._RELATIVE_TIME_UNIT_MAP: + self.assertIsNotNone(ert(f'1 {unit} ago'), f'unit {unit!r} did not parse') + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 61611e55e7..0a123f0c1d 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -1184,8 +1184,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor): }) return thumbnails - @staticmethod - def extract_relative_time(relative_time_text): + # Map abbreviated relative-time units to the long-form unit names that + # datetime_from_str() understands. + _RELATIVE_TIME_UNIT_MAP = { + 's': 'second', 'sec': 'second', 'second': 'second', + 'min': 'minute', 'minute': 'minute', + 'h': 'hour', 'hr': 'hour', 'hour': 'hour', + 'd': 'day', 'day': 'day', + 'w': 'week', 'wk': 'week', 'week': 'week', + 'mo': 'month', 'month': 'month', + 'y': 'year', 'yr': 'year', 'year': 'year', + } + + @classmethod + def extract_relative_time(cls, relative_time_text): """ Extracts a relative time from string and converts to dt object e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' @@ -1195,15 +1207,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat + # Sort longest-first: regex alternation matches left-to-right, so short + # keys like 's' must come after 'sec'/'second' to avoid premature matches. + units = '|'.join(map(re.escape, sorted(cls._RELATIVE_TIME_UNIT_MAP, key=len, reverse=True))) mobj = re.search( - r'(?Ptoday|yesterday|now)|(?P