mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-07-02 16:28:52 +00:00
[ie/youtube] Fix extract_relative_time for abbreviated units (#16687)
Authored by: dialmaster
This commit is contained in:
parent
d23e6f5a38
commit
6a24c96f7f
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.extractor.youtube._base import YoutubeBaseInfoExtractor
|
||||
|
||||
|
||||
class TestYoutubeMisc(unittest.TestCase):
|
||||
@ -21,6 +22,42 @@ class TestYoutubeMisc(unittest.TestCase):
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_extract_relative_time(self):
|
||||
ert = YoutubeBaseInfoExtractor.extract_relative_time
|
||||
|
||||
# Abbreviated forms must equal their long-form equivalents.
|
||||
self.assertEqual(ert('5d ago'), ert('5 days ago'))
|
||||
self.assertEqual(ert('1mo ago'), ert('1 month ago'))
|
||||
self.assertEqual(ert('2mo ago'), ert('2 months ago'))
|
||||
self.assertEqual(ert('1y ago'), ert('1 year ago'))
|
||||
self.assertEqual(ert('1yr ago'), ert('1 year ago'))
|
||||
self.assertEqual(ert('3w ago'), ert('3 weeks ago'))
|
||||
self.assertEqual(ert('3wk ago'), ert('3 weeks ago'))
|
||||
|
||||
self.assertIsNotNone(ert('30s ago'))
|
||||
self.assertIsNotNone(ert('30sec ago'))
|
||||
self.assertIsNotNone(ert('10min ago'))
|
||||
self.assertIsNotNone(ert('5h ago'))
|
||||
self.assertIsNotNone(ert('5hr ago'))
|
||||
|
||||
self.assertIsNotNone(ert('today'))
|
||||
self.assertIsNotNone(ert('yesterday'))
|
||||
self.assertIsNotNone(ert('now'))
|
||||
|
||||
self.assertEqual(ert('5 days ago'), ert('5 day ago'))
|
||||
|
||||
self.assertIsNotNone(ert('streamed 6 days ago'))
|
||||
self.assertIsNotNone(ert('5 seconds ago (edited)'))
|
||||
self.assertIsNotNone(ert('updated today'))
|
||||
self.assertIsNotNone(ert('8 yr ago'))
|
||||
|
||||
self.assertIsNone(ert('not a date string'))
|
||||
self.assertIsNone(ert(''))
|
||||
|
||||
# Small safety check to prevent "drift".
|
||||
for unit in YoutubeBaseInfoExtractor._RELATIVE_TIME_UNIT_MAP:
|
||||
self.assertIsNotNone(ert(f'1 {unit} ago'), f'unit {unit!r} did not parse')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@ -1184,8 +1184,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
})
|
||||
return thumbnails
|
||||
|
||||
@staticmethod
|
||||
def extract_relative_time(relative_time_text):
|
||||
# Map abbreviated relative-time units to the long-form unit names that
|
||||
# datetime_from_str() understands.
|
||||
_RELATIVE_TIME_UNIT_MAP = {
|
||||
's': 'second', 'sec': 'second', 'second': 'second',
|
||||
'min': 'minute', 'minute': 'minute',
|
||||
'h': 'hour', 'hr': 'hour', 'hour': 'hour',
|
||||
'd': 'day', 'day': 'day',
|
||||
'w': 'week', 'wk': 'week', 'week': 'week',
|
||||
'mo': 'month', 'month': 'month',
|
||||
'y': 'year', 'yr': 'year', 'year': 'year',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def extract_relative_time(cls, relative_time_text):
|
||||
"""
|
||||
Extracts a relative time from string and converts to dt object
|
||||
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
|
||||
@ -1195,15 +1207,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# The relative time text strings are roughly the same as what
|
||||
# Javascript's Intl.RelativeTimeFormat function generates.
|
||||
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
|
||||
# Sort longest-first: regex alternation matches left-to-right, so short
|
||||
# keys like 's' must come after 'sec'/'second' to avoid premature matches.
|
||||
units = '|'.join(map(re.escape, sorted(cls._RELATIVE_TIME_UNIT_MAP, key=len, reverse=True)))
|
||||
mobj = re.search(
|
||||
r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
|
||||
rf'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>{units})s?\s*ago',
|
||||
relative_time_text)
|
||||
if mobj:
|
||||
start = mobj.group('start')
|
||||
if start:
|
||||
return datetime_from_str(start)
|
||||
unit = cls._RELATIVE_TIME_UNIT_MAP[mobj.group('unit')]
|
||||
try:
|
||||
return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
|
||||
return datetime_from_str(f'now-{mobj.group("time")}{unit}')
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user