mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-23 00:56:02 +00:00
fix extractor harder
Authored by: bashonly
This commit is contained in:
parent
b91afadd1a
commit
d470a4c4fb
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
@ -15,7 +16,8 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
|
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
|
||||||
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
|
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
|
||||||
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
|
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
|
||||||
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}',
|
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
|
||||||
|
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
|
||||||
]
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||||
@ -114,6 +116,27 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
'tags': [],
|
'tags': [],
|
||||||
'upload_date': '20150401',
|
'upload_date': '20150401',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
|
||||||
|
'md5': 'a14c0603557decc6531260791c23cc5e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3023759',
|
||||||
|
'ext': 'flv',
|
||||||
|
'season_number': 2024,
|
||||||
|
'timestamp': 1731798000,
|
||||||
|
'season': '2024',
|
||||||
|
'episode': 'Episode 125',
|
||||||
|
'description': 'CTV News Ottawa at Six',
|
||||||
|
'duration': 2712.076,
|
||||||
|
'episode_number': 125,
|
||||||
|
'upload_date': '20241116',
|
||||||
|
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
|
||||||
|
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||||
|
'categories': [],
|
||||||
|
'tags': [],
|
||||||
|
'series': 'CTV News Ottawa at Six',
|
||||||
|
'season_id': '92667',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ctvnews.ca/1.810401',
|
'url': 'http://www.ctvnews.ca/1.810401',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -131,7 +154,10 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
|
|
||||||
if page_id.isdecimal():
|
if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
|
||||||
|
page_id = mobj.group('id')
|
||||||
|
|
||||||
|
if re.fullmatch(self._VIDEO_ID_RE, page_id):
|
||||||
return self._ninecninemedia_url_result(page_id)
|
return self._ninecninemedia_url_result(page_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
|
webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user