Compare commits

..

3 Commits

Author SHA1 Message Date
Link
a13a336aa6
[ie/bilibili] Fix subtitles and chapters extraction (#11708)
Authored by: xiaomac
2024-12-03 04:08:46 +00:00
N/Ame
dc16876480
[ie/bilibili] Always try to extract HD formats (#10559)
Closes #10554
Authored by: grqz
2024-12-03 03:44:03 +00:00
N/Ame
f05a1cd149
[ie/bilibili] Fix supporter-only video extraction (#11711)
Fix bug in 239f5f36fe04603bec59c8b975f6a792f10246db
Closes #11702
Authored by: grqz, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-12-03 01:19:22 +00:00

View File

@ -62,7 +62,7 @@ class BilibiliBaseIE(InfoExtractor):
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
if missing_formats: if missing_formats:
self.to_screen( self.to_screen(
f'Format(s) {missing_formats} are missing; you have to login or ' f'Format(s) {missing_formats} are missing; you have to '
f'become a premium member to download them. {self._login_hint()}') f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info): def extract_formats(self, play_info):
@ -164,14 +164,18 @@ class BilibiliBaseIE(InfoExtractor):
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None): def _download_playinfo(self, bvid, cid, headers=None, query=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
if qn: if self.is_logged_in:
params['qn'] = qn params.pop('try_look', None)
if qn := params.get('qn'):
note = f'Downloading video format {qn} for cid {cid}'
else:
note = f'Downloading video formats for cid {cid}'
return self._download_json( return self._download_json(
'https://api.bilibili.com/x/player/wbi/playurl', bvid, 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data): def json2srt(self, json_data):
srt_data = '' srt_data = ''
@ -190,7 +194,7 @@ class BilibiliBaseIE(InfoExtractor):
} }
video_info = self._download_json( video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id, 'https://api.bilibili.com/x/player/wbi/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}', headers=self._HEADERS) note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')): if traverse_obj(video_info, ('data', 'need_login_subtitle')):
@ -206,7 +210,7 @@ class BilibiliBaseIE(InfoExtractor):
def _get_chapters(self, aid, cid): def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json( chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, 'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
note='Extracting chapters', fatal=False, headers=self._HEADERS) note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., { return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content', 'title': 'content',
@ -285,7 +289,7 @@ class BilibiliBaseIE(InfoExtractor):
('data', 'interaction', 'graph_version', {int_or_none})) ('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items(): for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid, headers=headers) play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
yield { yield {
**metainfo, **metainfo,
'id': f'{video_id}_{cid}', 'id': f'{video_id}_{cid}',
@ -652,13 +656,6 @@ class BiliBiliIE(BilibiliBaseIE):
else: else:
video_data = initial_state['videoData'] video_data = initial_state['videoData']
if video_data.get('is_upower_exclusive'):
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
raise ExtractorError(
'This is a supporter-only video: '
f'{join_nonempty("title", "sub_title", from_dict=high_level, delim="")}. '
f'{self._login_hint()}', expected=True)
video_id, title = video_data['bvid'], video_data.get('title') video_id, title = video_data['bvid'], video_data.get('title')
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
@ -688,7 +685,7 @@ class BiliBiliIE(BilibiliBaseIE):
traverse_obj( traverse_obj(
self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
('data', {dict})) ('data', {dict}))
or self._download_playinfo(video_id, cid, headers=headers)) or self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}))
festival_info = {} festival_info = {}
if is_festival: if is_festival:
@ -726,15 +723,25 @@ class BiliBiliIE(BilibiliBaseIE):
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid)) __post_extractor=self.extract_comments(aid))
else:
formats = self.extract_formats(play_info) formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')): if video_data.get('is_upower_exclusive'):
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim="")}. {self._login_hint()}'
if not formats:
raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
self.report_warning(
f'This is a supporter-only video, only the preview will be extracted: {msg}',
video_id=video_id)
if not traverse_obj(play_info, 'dash'):
# we only have legacy formats and need additional work # we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj( formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
lambda _, v: not has_qn(v['quality']))) lambda _, v: not has_qn(v['quality'])))
self._check_missing_formats(play_info, formats) self._check_missing_formats(play_info, formats)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
@ -773,7 +780,7 @@ class BiliBiliIE(BilibiliBaseIE):
} for idx, fragment in enumerate(formats[0]['fragments'])], } for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': float_or_none(play_info.get('timelength'), scale=1000),
} }
else:
return { return {
**metainfo, **metainfo,
'formats': formats, 'formats': formats,
@ -859,10 +866,16 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
headers['Referer'] = url headers['Referer'] = url
play_info = self._download_json(
play_info = (
self._search_json(
r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
end_pattern='\n', default=None)
or self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
headers=headers) headers=headers))
premium_only = play_info.get('code') == -10403 premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}