mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-05-12 10:26:14 +00:00
Compare commits
No commits in common. "1c739bf53e673e06d2a43feddb5a31ee8496fa6e" and "f8b3fe33f68495ade453602a201b33e3aa69ed1f" have entirely different histories.
1c739bf53e
...
f8b3fe33f6
@ -564,10 +564,7 @@ from .eroprofile import (
|
|||||||
EroProfileAlbumIE,
|
EroProfileAlbumIE,
|
||||||
EroProfileIE,
|
EroProfileIE,
|
||||||
)
|
)
|
||||||
from .err import (
|
from .err import ERRJupiterIE
|
||||||
ERRArhiivIE,
|
|
||||||
ERRJupiterIE,
|
|
||||||
)
|
|
||||||
from .ertgr import (
|
from .ertgr import (
|
||||||
ERTFlixCodenameIE,
|
ERTFlixCodenameIE,
|
||||||
ERTFlixIE,
|
ERTFlixIE,
|
||||||
@ -2363,11 +2360,7 @@ from .voicy import (
|
|||||||
VoicyChannelIE,
|
VoicyChannelIE,
|
||||||
VoicyIE,
|
VoicyIE,
|
||||||
)
|
)
|
||||||
from .volejtv import (
|
from .volejtv import VolejTVIE
|
||||||
VolejTVCategoryPlaylistIE,
|
|
||||||
VolejTVClubPlaylistIE,
|
|
||||||
VolejTVIE,
|
|
||||||
)
|
|
||||||
from .voxmedia import (
|
from .voxmedia import (
|
||||||
VoxMediaIE,
|
VoxMediaIE,
|
||||||
VoxMediaVolumeIE,
|
VoxMediaVolumeIE,
|
||||||
|
|||||||
@ -21,44 +21,21 @@ class BoostyIE(InfoExtractor):
|
|||||||
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
|
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
|
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
|
||||||
'title': 'Бан? А! Бан! (Phasmophobia)',
|
'title': 'phasma_3',
|
||||||
'alt_title': 'Бан? А! Бан! (Phasmophobia)',
|
|
||||||
'channel': 'Kuplinov',
|
'channel': 'Kuplinov',
|
||||||
'channel_id': '7958701',
|
'channel_id': '7958701',
|
||||||
'timestamp': 1655031975,
|
'timestamp': 1655031975,
|
||||||
'upload_date': '20220612',
|
'upload_date': '20220612',
|
||||||
'release_timestamp': 1655049000,
|
'release_timestamp': 1655049000,
|
||||||
'release_date': '20220612',
|
'release_date': '20220612',
|
||||||
'modified_timestamp': 1743328648,
|
'modified_timestamp': 1668680993,
|
||||||
'modified_date': '20250330',
|
'modified_date': '20221117',
|
||||||
'tags': ['куплинов', 'phasmophobia'],
|
'tags': ['куплинов', 'phasmophobia'],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 105,
|
'duration': 105,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': r're:^https://iv\.okcdn\.ru/videoPreview\?',
|
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# single ok_video with truncated title
|
|
||||||
'url': 'https://boosty.to/kuplinov/posts/cc09b7f9-121e-40b8-9392-4a075ef2ce53',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'fb5ea762-6303-4557-9a17-157947326810',
|
|
||||||
'title': 'Какая там активность была? Не слышу! Повтори еще пару раз! (Phas',
|
|
||||||
'alt_title': 'Какая там активность была? Не слышу! Повтори еще пару раз! (Phasmophobia)',
|
|
||||||
'channel': 'Kuplinov',
|
|
||||||
'channel_id': '7958701',
|
|
||||||
'timestamp': 1655031930,
|
|
||||||
'upload_date': '20220612',
|
|
||||||
'release_timestamp': 1655048400,
|
|
||||||
'release_date': '20220612',
|
|
||||||
'modified_timestamp': 1743328616,
|
|
||||||
'modified_date': '20250330',
|
|
||||||
'tags': ['куплинов', 'phasmophobia'],
|
|
||||||
'like_count': int,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 39,
|
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': r're:^https://iv\.okcdn\.ru/videoPreview\?',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# multiple ok_video
|
# multiple ok_video
|
||||||
@ -132,41 +109,36 @@ class BoostyIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'skip': 'post has been deleted',
|
|
||||||
}, {
|
}, {
|
||||||
# single external video (youtube)
|
# single external video (youtube)
|
||||||
'url': 'https://boosty.to/futuremusicproduction/posts/32a8cae2-3252-49da-b285-0e014bc6e565',
|
'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-37FW_YQ3B4',
|
'id': 'EXelTnve5lY',
|
||||||
'title': 'Afro | Deep House FREE FLP',
|
'title': 'Послание Президента Федеральному Собранию | Класс народа',
|
||||||
'media_type': 'video',
|
'upload_date': '20210425',
|
||||||
'upload_date': '20250829',
|
'channel': 'Денис Чужой',
|
||||||
'timestamp': 1756466005,
|
'tags': 'count:10',
|
||||||
'channel': 'Future Music Production',
|
|
||||||
'tags': 'count:0',
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'ext': 'm4a',
|
'ext': 'mp4',
|
||||||
'duration': 170,
|
'duration': 816,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': r're:^https://i\.ytimg\.com/',
|
'thumbnail': r're:^https://i\.ytimg\.com/',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'channel_id': 'UCKVYrFBYmci1e-T8NeHw2qg',
|
'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
|
||||||
|
'channel_is_verified': bool,
|
||||||
'channel_url': r're:^https://www\.youtube\.com/',
|
'channel_url': r're:^https://www\.youtube\.com/',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'description': str,
|
'description': str,
|
||||||
|
'heatmap': 'count:100',
|
||||||
'live_status': str,
|
'live_status': str,
|
||||||
'playable_in_embed': bool,
|
'playable_in_embed': bool,
|
||||||
'uploader': str,
|
'uploader': str,
|
||||||
'uploader_id': str,
|
'uploader_id': str,
|
||||||
'uploader_url': r're:^https://www\.youtube\.com/',
|
'uploader_url': r're:^https://www\.youtube\.com/',
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
|
||||||
'Remote components challenge solver script',
|
|
||||||
'n challenge solving failed',
|
|
||||||
],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
|
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
|
||||||
@ -235,14 +207,13 @@ class BoostyIE(InfoExtractor):
|
|||||||
video_id = item.get('id') or post_id
|
video_id = item.get('id') or post_id
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'alt_title': post_title,
|
|
||||||
'formats': self._extract_formats(item.get('playerUrls'), video_id),
|
'formats': self._extract_formats(item.get('playerUrls'), video_id),
|
||||||
**common_metadata,
|
**common_metadata,
|
||||||
**traverse_obj(item, {
|
**traverse_obj(item, {
|
||||||
'title': ('title', {str}),
|
'title': ('title', {str}),
|
||||||
'duration': ('duration', {int_or_none}),
|
'duration': ('duration', {int_or_none}),
|
||||||
'view_count': ('viewsCounter', {int_or_none}),
|
'view_count': ('viewsCounter', {int_or_none}),
|
||||||
'thumbnail': (('preview', 'defaultPreview'), {url_or_none}),
|
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||||
}, get_all=False)})
|
}, get_all=False)})
|
||||||
|
|
||||||
if not entries and not post.get('hasAccess'):
|
if not entries and not post.get('hasAccess'):
|
||||||
|
|||||||
@ -366,7 +366,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _generate_blockbuster_headers():
|
def _generate_blockbuster_headers():
|
||||||
"""Randomize our HTTP header fingerprint to bust the HTTP Error 403 block"""
|
# Randomize our HTTP header fingerprint to bust the HTTP Error 403 block
|
||||||
|
# See https://github.com/yt-dlp/yt-dlp/issues/15526
|
||||||
|
|
||||||
def random_letters(minimum, maximum):
|
def random_letters(minimum, maximum):
|
||||||
# Omit vowels so we don't generate valid header names like 'authorization', etc
|
# Omit vowels so we don't generate valid header names like 'authorization', etc
|
||||||
@ -377,43 +378,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
for _ in range(random.randint(2, 8))
|
for _ in range(random.randint(2, 8))
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_dailymotion_m3u8_formats_and_subtitles(self, media_url, video_id, live=False):
|
|
||||||
"""See https://github.com/yt-dlp/yt-dlp/issues/15526"""
|
|
||||||
|
|
||||||
ERROR_NOTE = 'Unable to download m3u8 information'
|
|
||||||
last_error = None
|
|
||||||
|
|
||||||
for note, kwargs in (
|
|
||||||
('Downloading m3u8 information', {}),
|
|
||||||
('Retrying m3u8 download with randomized headers', {
|
|
||||||
'headers': self._generate_blockbuster_headers(),
|
|
||||||
}),
|
|
||||||
('Retrying m3u8 download with Chrome impersonation', {
|
|
||||||
'impersonate': 'chrome',
|
|
||||||
'require_impersonation': True,
|
|
||||||
}),
|
|
||||||
('Retrying m3u8 download with Firefox impersonation', {
|
|
||||||
'impersonate': 'firefox',
|
|
||||||
'require_impersonation': True,
|
|
||||||
}),
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
m3u8_doc = self._download_webpage(media_url, video_id, note, ERROR_NOTE, **kwargs)
|
|
||||||
break
|
|
||||||
except ExtractorError as e:
|
|
||||||
last_error = e.orig_msg
|
|
||||||
self.write_debug(f'{video_id}: {last_error}')
|
|
||||||
else:
|
|
||||||
if 'impersonation' not in last_error:
|
|
||||||
self.report_warning(last_error, video_id=video_id)
|
|
||||||
last_error = None
|
|
||||||
return [], {}, last_error
|
|
||||||
|
|
||||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles(
|
|
||||||
m3u8_doc, media_url, 'mp4', m3u8_id='hls', live=live, fatal=False)
|
|
||||||
|
|
||||||
return formats, subtitles, last_error
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||||
@ -467,7 +431,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
is_live = media.get('isOnAir')
|
is_live = media.get('isOnAir')
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
expected_error = None
|
|
||||||
|
|
||||||
for quality, media_list in metadata['qualities'].items():
|
for quality, media_list in metadata['qualities'].items():
|
||||||
for m in media_list:
|
for m in media_list:
|
||||||
@ -476,8 +439,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||||
continue
|
continue
|
||||||
if media_type == 'application/x-mpegURL':
|
if media_type == 'application/x-mpegURL':
|
||||||
fmt, subs, expected_error = self._extract_dailymotion_m3u8_formats_and_subtitles(
|
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
media_url, video_id, live=is_live)
|
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls',
|
||||||
|
fatal=False, headers=self._generate_blockbuster_headers())
|
||||||
formats.extend(fmt)
|
formats.extend(fmt)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
@ -494,10 +458,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'width': width,
|
'width': width,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
if not formats and expected_error:
|
|
||||||
self.raise_no_formats(expected_error, expected=True)
|
|
||||||
|
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f['url'] = f['url'].split('#')[0]
|
f['url'] = f['url'].split('#')[0]
|
||||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||||
|
|||||||
@ -2,7 +2,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
|
||||||
str_or_none,
|
str_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -223,70 +222,3 @@ class ERRJupiterIE(InfoExtractor):
|
|||||||
'episode_id': ('id', {str_or_none}),
|
'episode_id': ('id', {str_or_none}),
|
||||||
}) if data.get('type') == 'episode' else {}),
|
}) if data.get('type') == 'episode' else {}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ERRArhiivIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https://arhiiv\.err\.ee/video/(?:vaata/)?(?P<id>[^/?#]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://arhiiv.err.ee/video/kontsertpalad',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'kontsertpalad',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Kontsertpalad: 255 | L. Beethoveni sonaat c-moll, "Pateetiline"',
|
|
||||||
'description': 'md5:a70f4ff23c3618f3be63f704bccef063',
|
|
||||||
'series': 'Kontsertpalad',
|
|
||||||
'episode_id': 255,
|
|
||||||
'timestamp': 1666152162,
|
|
||||||
'upload_date': '20221019',
|
|
||||||
'release_year': 1970,
|
|
||||||
'modified_timestamp': 1718620982,
|
|
||||||
'modified_date': '20240617',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': 'm3u8'},
|
|
||||||
}, {
|
|
||||||
'url': 'https://arhiiv.err.ee/video/vaata/koalitsioonileppe-allkirjastamine',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'koalitsioonileppe-allkirjastamine',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Koalitsioonileppe allkirjastamine',
|
|
||||||
'timestamp': 1710728222,
|
|
||||||
'upload_date': '20240318',
|
|
||||||
'release_timestamp': 1611532800,
|
|
||||||
'release_date': '20210125',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': 'm3u8'},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
data = self._download_json(
|
|
||||||
f'https://arhiiv.err.ee/api/v1/content/video/{video_id}', video_id)
|
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
|
||||||
if hls_url := traverse_obj(data, ('media', 'src', 'hls', {url_or_none})):
|
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
|
||||||
formats.extend(fmts)
|
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
|
||||||
if dash_url := traverse_obj(data, ('media', 'src', 'dash', {url_or_none})):
|
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
|
||||||
dash_url, video_id, mpd_id='dash', fatal=False)
|
|
||||||
formats.extend(fmts)
|
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
**traverse_obj(data, ('info', {
|
|
||||||
'title': ('title', {str}),
|
|
||||||
'series': ('seriesTitle', {str}, filter),
|
|
||||||
'series_id': ('seriesId', {str}, filter),
|
|
||||||
'episode_id': ('episode', {int_or_none}),
|
|
||||||
'description': ('synopsis', {str}, filter),
|
|
||||||
'timestamp': ('uploadDate', {parse_iso8601}),
|
|
||||||
'modified_timestamp': ('dateModified', {parse_iso8601}),
|
|
||||||
'release_timestamp': ('date', {parse_iso8601}),
|
|
||||||
'release_year': ('year', {int_or_none}),
|
|
||||||
})),
|
|
||||||
}
|
|
||||||
|
|||||||
@ -371,16 +371,15 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = 'franceinfo'
|
IE_NAME = 'francetvinfo.fr'
|
||||||
IE_DESC = 'franceinfo.fr (formerly francetvinfo.fr)'
|
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
|
||||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.france(?:tv)?info.fr/(?:[^/?#]+/)*(?P<id>[^/?#&.]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
|
'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
|
'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Soir 3 - Émission du jeudi 22 août 2019',
|
'title': 'Soir 3',
|
||||||
'upload_date': '20190822',
|
'upload_date': '20190822',
|
||||||
'timestamp': 1566510730,
|
'timestamp': 1566510730,
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
@ -399,7 +398,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
|
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Journal 20h00 - Covid-19 : une situation catastrophique à New Dehli',
|
'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
'duration': 76,
|
'duration': 76,
|
||||||
'timestamp': 1619028518,
|
'timestamp': 1619028518,
|
||||||
@ -439,18 +438,6 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
|
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
|
||||||
},
|
},
|
||||||
'add_ie': ['Dailymotion'],
|
'add_ie': ['Dailymotion'],
|
||||||
'skip': 'Broken Dailymotion link',
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.franceinfo.fr/monde/usa/presidentielle/donald-trump/etats-unis-un-risque-d-embrasement-apres-la-mort-d-un-manifestant_7764542.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'f920fcc2-fa20-11f0-ac98-57a09c50f7ce',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Affaires sensibles - Manifestant tué Le risque d\'embrasement',
|
|
||||||
'duration': 118,
|
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
|
||||||
'timestamp': 1769367756,
|
|
||||||
'upload_date': '20260125',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -458,9 +445,6 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
# "<figure id=" pattern (#28792)
|
# "<figure id=" pattern (#28792)
|
||||||
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://www.franceinfo.fr/replay-jt/france-2/20-heures/robert-de-niro-portrait-d-un-monument-du-cinema_7245456.html',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -476,7 +460,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
video_id = (
|
video_id = (
|
||||||
traverse_obj(webpage, (
|
traverse_obj(webpage, (
|
||||||
{find_element(tag='(button|div)', attr='data-cy', value='francetv-player-wrapper', html=True, regex=True)},
|
{find_element(tag='button', attr='data-cy', value='francetv-player-wrapper', html=True)},
|
||||||
{extract_attributes}, 'id'))
|
{extract_attributes}, 'id'))
|
||||||
or self._search_regex(
|
or self._search_regex(
|
||||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||||
|
|||||||
@ -104,9 +104,9 @@ class FrontroGroupBaseIE(FrontoBaseIE):
|
|||||||
class TheChosenIE(FrontroVideoBaseIE):
|
class TheChosenIE(FrontroVideoBaseIE):
|
||||||
_CHANNEL_ID = '12884901895'
|
_CHANNEL_ID = '12884901895'
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://watch.thechosen.tv/watch/184683594325',
|
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '184683594325',
|
'id': '184683594325',
|
||||||
@ -124,7 +124,7 @@ class TheChosenIE(FrontroVideoBaseIE):
|
|||||||
'modified_date': str,
|
'modified_date': str,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://watch.thechosen.tv/watch/184683596189',
|
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '184683596189',
|
'id': '184683596189',
|
||||||
@ -147,7 +147,7 @@ class TheChosenIE(FrontroVideoBaseIE):
|
|||||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||||
_CHANNEL_ID = '12884901895'
|
_CHANNEL_ID = '12884901895'
|
||||||
_VIDEO_EXTRACTOR = TheChosenIE
|
_VIDEO_EXTRACTOR = TheChosenIE
|
||||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'
|
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|||||||
@ -156,36 +156,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
|||||||
'id': '17241424',
|
'id': '17241424',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Opus 28',
|
'title': 'Opus 28',
|
||||||
'upload_date': '20060912',
|
'upload_date': '20080211',
|
||||||
'timestamp': 1158076800,
|
'timestamp': 1202745600,
|
||||||
'duration': 263,
|
'duration': 263,
|
||||||
'thumbnail': r're:^http.*\.jpg',
|
'thumbnail': r're:^http.*\.jpg',
|
||||||
'album': 'Piano Solos, Vol. 2',
|
'album': 'Piano Solos Vol. 2',
|
||||||
'album_artist': 'Dustin O\'Halloran',
|
'album_artist': 'Dustin O\'Halloran',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'description': 'md5:b566b92c55ca348df65d206c5d689576',
|
'description': '[00:05.00]纯音乐,请欣赏\n',
|
||||||
'album_artists': ['Dustin O\'Halloran'],
|
'album_artists': ['Dustin O\'Halloran'],
|
||||||
'creators': ['Dustin O\'Halloran'],
|
'creators': ['Dustin O\'Halloran'],
|
||||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://music.163.com/#/song?id=2755669231',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2755669231',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': '十二月-Departure',
|
|
||||||
'upload_date': '20251111',
|
|
||||||
'timestamp': 1762876800,
|
|
||||||
'duration': 188,
|
|
||||||
'thumbnail': r're:^http.*\.jpg',
|
|
||||||
'album': '円',
|
|
||||||
'album_artist': 'ひとひら',
|
|
||||||
'average_rating': int,
|
|
||||||
'description': 'md5:deee249c8c9c3e2c54ecdab36e87d174',
|
|
||||||
'album_artists': ['ひとひら'],
|
|
||||||
'creators': ['ひとひら'],
|
|
||||||
'subtitles': {'lyrics': [{'ext': 'lrc', 'data': 'md5:d32b4425a5d6c9fa249ca6e803dd0401'}]},
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||||
'md5': 'b896be78d8d34bd7bb665b26710913ff',
|
'md5': 'b896be78d8d34bd7bb665b26710913ff',
|
||||||
@ -259,16 +241,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
|||||||
'lyrics': [{'data': original, 'ext': 'lrc'}],
|
'lyrics': [{'data': original, 'ext': 'lrc'}],
|
||||||
}
|
}
|
||||||
|
|
||||||
def collect_lyrics(lrc):
|
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
|
||||||
lyrics_expr = r'\[([0-9]{2}):([0-9]{2})[:.]([0-9]{2,})\]([^\n]+)'
|
original_ts_texts = re.findall(lyrics_expr, original)
|
||||||
matches = re.findall(lyrics_expr, lrc)
|
translation_ts_dict = dict(re.findall(lyrics_expr, translated))
|
||||||
return (
|
|
||||||
(f'[{minute}:{sec}.{msec}]', text)
|
|
||||||
for minute, sec, msec, text in matches
|
|
||||||
)
|
|
||||||
|
|
||||||
original_ts_texts = collect_lyrics(original)
|
|
||||||
translation_ts_dict = dict(collect_lyrics(translated))
|
|
||||||
|
|
||||||
merged = '\n'.join(
|
merged = '\n'.join(
|
||||||
join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
|
join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .sproutvideo import VidsIoIE
|
from .sproutvideo import VidsIoIE
|
||||||
@ -10,23 +11,15 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
update_url_query,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import (
|
from ..utils.traversal import require, traverse_obj, value
|
||||||
find_elements,
|
|
||||||
require,
|
|
||||||
traverse_obj,
|
|
||||||
value,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PatreonBaseIE(InfoExtractor):
|
class PatreonBaseIE(InfoExtractor):
|
||||||
@ -128,7 +121,6 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'channel_is_verified': True,
|
'channel_is_verified': True,
|
||||||
'chapters': 'count:4',
|
'chapters': 'count:4',
|
||||||
'timestamp': 1423689666,
|
'timestamp': 1423689666,
|
||||||
'media_type': 'video',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
@ -169,7 +161,7 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'uploader_url': 'https://www.patreon.com/loish',
|
'uploader_url': 'https://www.patreon.com/loish',
|
||||||
'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
|
'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
|
||||||
'title': 'VIDEO // sketchbook flipthrough',
|
'title': 'VIDEO // sketchbook flipthrough',
|
||||||
'uploader': 'Loish',
|
'uploader': 'Loish ',
|
||||||
'tags': ['sketchbook', 'video'],
|
'tags': ['sketchbook', 'video'],
|
||||||
'channel_id': '1641751',
|
'channel_id': '1641751',
|
||||||
'channel_url': 'https://www.patreon.com/loish',
|
'channel_url': 'https://www.patreon.com/loish',
|
||||||
@ -282,73 +274,8 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'channel_id': '9346307',
|
'channel_id': '9346307',
|
||||||
},
|
},
|
||||||
'params': {'getcomments': True},
|
'params': {'getcomments': True},
|
||||||
}, {
|
|
||||||
# Inlined media in post; uses _extract_from_media_api
|
|
||||||
'url': 'https://www.patreon.com/posts/scottfalco-146966245',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '146966245',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'scottfalco 1080',
|
|
||||||
'description': 'md5:a3f29bbd0a46b4821ec3400957c98aa2',
|
|
||||||
'uploader': 'Insanimate',
|
|
||||||
'uploader_id': '2828146',
|
|
||||||
'uploader_url': 'https://www.patreon.com/Insanimate',
|
|
||||||
'channel_id': '6260877',
|
|
||||||
'channel_url': 'https://www.patreon.com/Insanimate',
|
|
||||||
'channel_follower_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'duration': 7.833333,
|
|
||||||
'timestamp': 1767061800,
|
|
||||||
'upload_date': '20251230',
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
_RETURN_TYPE = 'video'
|
_RETURN_TYPE = 'video'
|
||||||
_HTTP_HEADERS = {
|
|
||||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
|
||||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
|
||||||
'referer': 'https://www.patreon.com/',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_from_media_api(self, media_id):
|
|
||||||
attributes = traverse_obj(
|
|
||||||
self._call_api(f'media/{media_id}', media_id, fatal=False),
|
|
||||||
('data', 'attributes', {dict}))
|
|
||||||
if not attributes:
|
|
||||||
return None
|
|
||||||
|
|
||||||
info_dict = traverse_obj(attributes, {
|
|
||||||
'title': ('file_name', {lambda x: x.rpartition('.')[0]}),
|
|
||||||
'timestamp': ('created_at', {parse_iso8601}),
|
|
||||||
'duration': ('display', 'duration', {float_or_none}),
|
|
||||||
})
|
|
||||||
info_dict['id'] = media_id
|
|
||||||
|
|
||||||
playback_url = traverse_obj(
|
|
||||||
attributes, ('display', (None, 'viewer_playback_data'), 'url', {url_or_none}, any))
|
|
||||||
download_url = traverse_obj(attributes, ('download_url', {url_or_none}))
|
|
||||||
|
|
||||||
if playback_url and mimetype2ext(attributes.get('mimetype')) == 'm3u8':
|
|
||||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
playback_url, media_id, 'mp4', fatal=False, headers=self._HTTP_HEADERS)
|
|
||||||
for f in info_dict['formats']:
|
|
||||||
f['http_headers'] = self._HTTP_HEADERS
|
|
||||||
if transcript_url := traverse_obj(attributes, ('display', 'transcript_url', {url_or_none})):
|
|
||||||
info_dict['subtitles'].setdefault('en', []).append({
|
|
||||||
'url': transcript_url,
|
|
||||||
'ext': 'vtt',
|
|
||||||
})
|
|
||||||
elif playback_url or download_url:
|
|
||||||
info_dict['formats'] = [{
|
|
||||||
# If playback_url is available, download_url is a duplicate lower resolution format
|
|
||||||
'url': playback_url or download_url,
|
|
||||||
'vcodec': 'none' if attributes.get('media_type') != 'video' else None,
|
|
||||||
}]
|
|
||||||
|
|
||||||
if not info_dict.get('formats'):
|
|
||||||
return None
|
|
||||||
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -372,7 +299,6 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'comment_count': ('comment_count', {int_or_none}),
|
'comment_count': ('comment_count', {int_or_none}),
|
||||||
})
|
})
|
||||||
|
|
||||||
seen_media_ids = set()
|
|
||||||
entries = []
|
entries = []
|
||||||
idx = 0
|
idx = 0
|
||||||
for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
|
for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
|
||||||
@ -394,8 +320,6 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'url': download_url,
|
'url': download_url,
|
||||||
'alt_title': traverse_obj(media_attributes, ('file_name', {str})),
|
'alt_title': traverse_obj(media_attributes, ('file_name', {str})),
|
||||||
})
|
})
|
||||||
if media_id := traverse_obj(include, ('id', {str})):
|
|
||||||
seen_media_ids.add(media_id)
|
|
||||||
|
|
||||||
elif include_type == 'user':
|
elif include_type == 'user':
|
||||||
info.update(traverse_obj(include, {
|
info.update(traverse_obj(include, {
|
||||||
@ -416,29 +340,34 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
if embed_url := traverse_obj(attributes, ('embed', 'url', {url_or_none})):
|
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||||
# Convert useless vimeo.com URLs to useful player.vimeo.com embed URLs
|
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||||
vimeo_id, vimeo_hash = self._search_regex(
|
headers = {'referer': 'https://www.patreon.com/'}
|
||||||
r'//vimeo\.com/(\d+)(?:/([\da-f]+))?', embed_url,
|
|
||||||
'vimeo id', group=(1, 2), default=(None, None))
|
|
||||||
if vimeo_id:
|
|
||||||
embed_url = update_url_query(
|
|
||||||
f'https://player.vimeo.com/video/{vimeo_id}',
|
|
||||||
{'h': vimeo_hash or []})
|
|
||||||
if VimeoIE.suitable(embed_url):
|
|
||||||
entry = self.url_result(
|
|
||||||
VimeoIE._smuggle_referrer(embed_url, self._HTTP_HEADERS['referer']),
|
|
||||||
VimeoIE, url_transparent=True)
|
|
||||||
else:
|
|
||||||
entry = self.url_result(smuggle_url(embed_url, self._HTTP_HEADERS))
|
|
||||||
|
|
||||||
if urlh := self._request_webpage(
|
# handle Vimeo embeds
|
||||||
embed_url, video_id, 'Checking embed URL', headers=self._HTTP_HEADERS,
|
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||||
fatal=False, errnote=False, expected_status=(403, 429), # Ignore Vimeo 429's
|
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||||
):
|
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||||
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||||
if VidsIoIE.suitable(embed_url) or urlh.status != 403:
|
if url_or_none(v_url) and self._request_webpage(
|
||||||
entries.append(entry)
|
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||||
|
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||||
|
entries.append(self.url_result(
|
||||||
|
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||||
|
VimeoIE, url_transparent=True))
|
||||||
|
|
||||||
|
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||||
|
if embed_url and (urlh := self._request_webpage(
|
||||||
|
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||||
|
fatal=False, errnote=False, expected_status=403)):
|
||||||
|
# Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
|
||||||
|
# to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag
|
||||||
|
meta_description = clean_html(self._html_search_meta(
|
||||||
|
'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
|
||||||
|
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||||
|
if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
|
||||||
|
or VidsIoIE.suitable(embed_url)):
|
||||||
|
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||||
|
|
||||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||||
if post_file:
|
if post_file:
|
||||||
@ -452,27 +381,13 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
})
|
})
|
||||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
post_file['url'], video_id, headers=self._HTTP_HEADERS)
|
post_file['url'], video_id, headers=headers)
|
||||||
for f in formats:
|
|
||||||
f['http_headers'] = self._HTTP_HEADERS
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'http_headers': headers,
|
||||||
})
|
})
|
||||||
if media_id := traverse_obj(post_file, ('media_id', {int}, {str_or_none})):
|
|
||||||
seen_media_ids.add(media_id)
|
|
||||||
|
|
||||||
for media_id in traverse_obj(attributes, (
|
|
||||||
'content', {find_elements(attr='data-media-id', value=r'\d+', regex=True, html=True)},
|
|
||||||
..., {extract_attributes}, 'data-media-id',
|
|
||||||
)):
|
|
||||||
# Inlined media may be duplicates of what was extracted above
|
|
||||||
if media_id in seen_media_ids:
|
|
||||||
continue
|
|
||||||
if media := self._extract_from_media_api(media_id):
|
|
||||||
entries.append(media)
|
|
||||||
seen_media_ids.add(media_id)
|
|
||||||
|
|
||||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||||
comments = None
|
comments = None
|
||||||
|
|||||||
@ -453,23 +453,6 @@ class PBSIE(InfoExtractor):
|
|||||||
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
|
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
# Next.js v13+, see https://github.com/yt-dlp/yt-dlp/issues/13299
|
|
||||||
'url': 'https://www.pbs.org/video/caregiving',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3101776876',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Caregiving - Caregiving',
|
|
||||||
'description': 'A documentary revealing America’s caregiving crisis through intimate stories and expert insight.',
|
|
||||||
'display_id': 'caregiving',
|
|
||||||
'duration': 6783,
|
|
||||||
'thumbnail': 'https://image.pbs.org/video-assets/BSrSkcc-asset-mezzanine-16x9-nlcxQts.jpg',
|
|
||||||
'chapters': [],
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
_ERRORS = {
|
_ERRORS = {
|
||||||
101: 'We\'re sorry, but this video is not yet available.',
|
101: 'We\'re sorry, but this video is not yet available.',
|
||||||
@ -523,7 +506,6 @@ class PBSIE(InfoExtractor):
|
|||||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||||
r'\\"videoTPMediaId\\":\\\"(\d+)\\"', # Next.js v13, e.g. https://www.pbs.org/video/caregiving
|
|
||||||
r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)', # last pattern to avoid false positives
|
r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)', # last pattern to avoid false positives
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -1,167 +1,40 @@
|
|||||||
import functools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
|
||||||
join_nonempty,
|
|
||||||
orderedSet,
|
|
||||||
str_or_none,
|
|
||||||
strftime_or_none,
|
|
||||||
unified_timestamp,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
from ..utils.traversal import (
|
|
||||||
require,
|
|
||||||
traverse_obj,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VolejTVBaseIE(InfoExtractor):
|
class VolejTVIE(InfoExtractor):
|
||||||
TBR_HEIGHT_MAPPING = {
|
_VALID_URL = r'https?://volej\.tv/video/(?P<id>\d+)'
|
||||||
'6000': 1080,
|
|
||||||
'2400': 720,
|
|
||||||
'1500': 480,
|
|
||||||
'800': 360,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _call_api(self, endpoint, display_id, query=None):
|
|
||||||
return self._download_json(
|
|
||||||
f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/{endpoint}',
|
|
||||||
display_id, query=query)
|
|
||||||
|
|
||||||
|
|
||||||
class VolejTVIE(VolejTVBaseIE):
|
|
||||||
IE_NAME = 'volejtv:match'
|
|
||||||
_VALID_URL = r'https?://volej\.tv/match/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://volej.tv/match/270579',
|
'url': 'https://volej.tv/video/725742/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '270579',
|
'id': '725742',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'SWE-CZE (2024-06-16)',
|
'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV',
|
||||||
'categories': ['ženy'],
|
'thumbnail': 'https://volej.tv/images/og/16/17186/og.png',
|
||||||
'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA',
|
'title': 'VK Královo Pole vs VK Prostějov',
|
||||||
'season': '2023-2024',
|
|
||||||
'timestamp': 1718553600,
|
|
||||||
'upload_date': '20240616',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://volej.tv/match/487520',
|
'url': 'https://volej.tv/video/725605/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '487520',
|
'id': '725605',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': r're:https://.+\.(png|jpeg)',
|
'thumbnail': 'https://volej.tv/images/og/15/17185/og.png',
|
||||||
'title': 'FRA-CZE (2024-09-06)',
|
'title': 'VK Lvi Praha vs VK Euro Sitex Příbram',
|
||||||
'categories': ['mládež'],
|
'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV',
|
||||||
'series': 'Mistrovství Evropy do 20 let',
|
|
||||||
'season': '2024-2025',
|
|
||||||
'timestamp': 1725627600,
|
|
||||||
'upload_date': '20240906',
|
|
||||||
|
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
json_data = self._call_api(f'match/{video_id}', video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
json_data = self._search_json(
|
||||||
formats = []
|
r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id)
|
||||||
for video in traverse_obj(json_data, ('videos', 0, 'qualities', lambda _, v: url_or_none(v['cloud_front_path']))):
|
formats, subtitle = self._extract_m3u8_formats_and_subtitles(
|
||||||
formats.append(traverse_obj(video, {
|
json_data['urls']['hls'], video_id)
|
||||||
'url': 'cloud_front_path',
|
return {
|
||||||
'tbr': ('quality', {int_or_none}),
|
|
||||||
'format_id': ('id', {str_or_none}),
|
|
||||||
'height': ('quality', {self.TBR_HEIGHT_MAPPING.get}),
|
|
||||||
}))
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
**traverse_obj(json_data, {
|
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||||
'series': ('competition_name', {str}),
|
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
|
||||||
'season': ('season', {str}),
|
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
|
||||||
'timestamp': ('match_time', {unified_timestamp}),
|
|
||||||
'categories': ('category', ('title'), {str}, filter, all, filter),
|
|
||||||
'thumbnail': ('poster', {url_or_none}),
|
|
||||||
}),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitle,
|
||||||
}
|
}
|
||||||
|
|
||||||
teams = orderedSet(traverse_obj(json_data, ('teams', ..., 'shortcut', {str})))
|
|
||||||
if len(teams) > 2 and 'FIN' in teams:
|
|
||||||
teams.remove('FIN')
|
|
||||||
|
|
||||||
data['title'] = join_nonempty(
|
|
||||||
join_nonempty(*teams, delim='-'),
|
|
||||||
strftime_or_none(data.get('timestamp'), '(%Y-%m-%d)'),
|
|
||||||
delim=' ')
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class VolejTVPlaylistBaseIE(VolejTVBaseIE):
|
|
||||||
"""Subclasses must set _API_FILTER, _PAGE_SIZE"""
|
|
||||||
|
|
||||||
def _get_page(self, playlist_id, page):
|
|
||||||
return self._call_api(
|
|
||||||
f'match/{self._API_FILTER}/{playlist_id}', playlist_id,
|
|
||||||
query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'})
|
|
||||||
|
|
||||||
def _entries(self, playlist_id, first_page_data, page):
|
|
||||||
entries = first_page_data if page == 0 else self._get_page(playlist_id, page)
|
|
||||||
for match_id in traverse_obj(entries, ('data', ..., 'id')):
|
|
||||||
yield self.url_result(f'https://volej.tv/match/{match_id}', VolejTVIE)
|
|
||||||
|
|
||||||
|
|
||||||
class VolejTVClubPlaylistIE(VolejTVPlaylistBaseIE):
|
|
||||||
IE_NAME = 'volejtv:club'
|
|
||||||
_VALID_URL = r'https?://volej\.tv/klub/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://volej.tv/klub/1173',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1173',
|
|
||||||
'title': 'VK Jihostroj České Budějovice',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 30,
|
|
||||||
}]
|
|
||||||
_API_FILTER = 'by-team-id-paginated'
|
|
||||||
_PAGE_SIZE = 6
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
title = self._call_api(f'team/show/{playlist_id}', playlist_id)['title']
|
|
||||||
first_page_data = self._get_page(playlist_id, 0)
|
|
||||||
total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')}))
|
|
||||||
return self.playlist_result(InAdvancePagedList(
|
|
||||||
functools.partial(self._entries, playlist_id, first_page_data),
|
|
||||||
total_pages, self._PAGE_SIZE), playlist_id, title)
|
|
||||||
|
|
||||||
|
|
||||||
class VolejTVCategoryPlaylistIE(VolejTVPlaylistBaseIE):
|
|
||||||
IE_NAME = 'volejtv:category'
|
|
||||||
_VALID_URL = r'https?://volej\.tv/kategorie/(?P<id>[^/$?]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://volej.tv/kategorie/chance-cesky-pohar',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'chance-cesky-pohar',
|
|
||||||
'title': 'Chance Český pohár',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 30,
|
|
||||||
}]
|
|
||||||
_API_FILTER = 'by-category-id-paginated'
|
|
||||||
_PAGE_SIZE = 10
|
|
||||||
|
|
||||||
def _get_category(self, playlist_id):
|
|
||||||
categories = self._call_api('category', playlist_id)
|
|
||||||
for category in traverse_obj(categories, (lambda _, v: v['slug'] and v['id'] and v['title'])):
|
|
||||||
if category['slug'] == playlist_id:
|
|
||||||
return category['id'], category['title']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
category_id, title = self._get_category(playlist_id)
|
|
||||||
first_page_data = self._get_page(category_id, 0)
|
|
||||||
total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int}, {require('page count')}))
|
|
||||||
return self.playlist_result(InAdvancePagedList(
|
|
||||||
functools.partial(self._entries, category_id, first_page_data),
|
|
||||||
total_pages, self._PAGE_SIZE), playlist_id, title)
|
|
||||||
|
|||||||
@ -76,7 +76,7 @@ class WatIE(InfoExtractor):
|
|||||||
if error_code == 'GEOBLOCKED':
|
if error_code == 'GEOBLOCKED':
|
||||||
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
|
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
|
||||||
elif error_code == 'DELIVERY_ERROR':
|
elif error_code == 'DELIVERY_ERROR':
|
||||||
if traverse_obj(video_data, ('delivery', 'code')) in (403, 500):
|
if traverse_obj(video_data, ('delivery', 'code')) == 500:
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
error_desc = join_nonempty(
|
error_desc = join_nonempty(
|
||||||
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
|
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user