mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-07-03 00:39:41 +00:00
[ie/youtube] Fix extract_relative_time for abbreviated units (#16687)
Authored by: dialmaster
This commit is contained in:
parent
d23e6f5a38
commit
6a24c96f7f
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
|
|
||||||
from yt_dlp.extractor import YoutubeIE
|
from yt_dlp.extractor import YoutubeIE
|
||||||
|
from yt_dlp.extractor.youtube._base import YoutubeBaseInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeMisc(unittest.TestCase):
|
class TestYoutubeMisc(unittest.TestCase):
|
||||||
@ -21,6 +22,42 @@ class TestYoutubeMisc(unittest.TestCase):
|
|||||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
|
def test_extract_relative_time(self):
|
||||||
|
ert = YoutubeBaseInfoExtractor.extract_relative_time
|
||||||
|
|
||||||
|
# Abbreviated forms must equal their long-form equivalents.
|
||||||
|
self.assertEqual(ert('5d ago'), ert('5 days ago'))
|
||||||
|
self.assertEqual(ert('1mo ago'), ert('1 month ago'))
|
||||||
|
self.assertEqual(ert('2mo ago'), ert('2 months ago'))
|
||||||
|
self.assertEqual(ert('1y ago'), ert('1 year ago'))
|
||||||
|
self.assertEqual(ert('1yr ago'), ert('1 year ago'))
|
||||||
|
self.assertEqual(ert('3w ago'), ert('3 weeks ago'))
|
||||||
|
self.assertEqual(ert('3wk ago'), ert('3 weeks ago'))
|
||||||
|
|
||||||
|
self.assertIsNotNone(ert('30s ago'))
|
||||||
|
self.assertIsNotNone(ert('30sec ago'))
|
||||||
|
self.assertIsNotNone(ert('10min ago'))
|
||||||
|
self.assertIsNotNone(ert('5h ago'))
|
||||||
|
self.assertIsNotNone(ert('5hr ago'))
|
||||||
|
|
||||||
|
self.assertIsNotNone(ert('today'))
|
||||||
|
self.assertIsNotNone(ert('yesterday'))
|
||||||
|
self.assertIsNotNone(ert('now'))
|
||||||
|
|
||||||
|
self.assertEqual(ert('5 days ago'), ert('5 day ago'))
|
||||||
|
|
||||||
|
self.assertIsNotNone(ert('streamed 6 days ago'))
|
||||||
|
self.assertIsNotNone(ert('5 seconds ago (edited)'))
|
||||||
|
self.assertIsNotNone(ert('updated today'))
|
||||||
|
self.assertIsNotNone(ert('8 yr ago'))
|
||||||
|
|
||||||
|
self.assertIsNone(ert('not a date string'))
|
||||||
|
self.assertIsNone(ert(''))
|
||||||
|
|
||||||
|
# Small safety check to prevent "drift".
|
||||||
|
for unit in YoutubeBaseInfoExtractor._RELATIVE_TIME_UNIT_MAP:
|
||||||
|
self.assertIsNotNone(ert(f'1 {unit} ago'), f'unit {unit!r} did not parse')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@ -1184,8 +1184,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
})
|
})
|
||||||
return thumbnails
|
return thumbnails
|
||||||
|
|
||||||
@staticmethod
|
# Map abbreviated relative-time units to the long-form unit names that
|
||||||
def extract_relative_time(relative_time_text):
|
# datetime_from_str() understands.
|
||||||
|
_RELATIVE_TIME_UNIT_MAP = {
|
||||||
|
's': 'second', 'sec': 'second', 'second': 'second',
|
||||||
|
'min': 'minute', 'minute': 'minute',
|
||||||
|
'h': 'hour', 'hr': 'hour', 'hour': 'hour',
|
||||||
|
'd': 'day', 'day': 'day',
|
||||||
|
'w': 'week', 'wk': 'week', 'week': 'week',
|
||||||
|
'mo': 'month', 'month': 'month',
|
||||||
|
'y': 'year', 'yr': 'year', 'year': 'year',
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_relative_time(cls, relative_time_text):
|
||||||
"""
|
"""
|
||||||
Extracts a relative time from string and converts to dt object
|
Extracts a relative time from string and converts to dt object
|
||||||
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
|
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
|
||||||
@ -1195,15 +1207,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
# The relative time text strings are roughly the same as what
|
# The relative time text strings are roughly the same as what
|
||||||
# Javascript's Intl.RelativeTimeFormat function generates.
|
# Javascript's Intl.RelativeTimeFormat function generates.
|
||||||
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
|
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
|
||||||
|
# Sort longest-first: regex alternation matches left-to-right, so short
|
||||||
|
# keys like 's' must come after 'sec'/'second' to avoid premature matches.
|
||||||
|
units = '|'.join(map(re.escape, sorted(cls._RELATIVE_TIME_UNIT_MAP, key=len, reverse=True)))
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
|
rf'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>{units})s?\s*ago',
|
||||||
relative_time_text)
|
relative_time_text)
|
||||||
if mobj:
|
if mobj:
|
||||||
start = mobj.group('start')
|
start = mobj.group('start')
|
||||||
if start:
|
if start:
|
||||||
return datetime_from_str(start)
|
return datetime_from_str(start)
|
||||||
|
unit = cls._RELATIVE_TIME_UNIT_MAP[mobj.group('unit')]
|
||||||
try:
|
try:
|
||||||
return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
|
return datetime_from_str(f'now-{mobj.group("time")}{unit}')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user