mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-22 00:29:50 +00:00
Merge branch 'master' into pr/11915
This commit is contained in:
commit
4ce2a4fa0b
@ -1326,7 +1326,7 @@ class YoutubeDL:
|
|||||||
elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
|
elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
|
||||||
and self.params.get('windowsfilenames') is False):
|
and self.params.get('windowsfilenames') is False):
|
||||||
def sanitize(key, value):
|
def sanitize(key, value):
|
||||||
return value.replace('/', '\u29F8').replace('\0', '')
|
return str(value).replace('/', '\u29F8').replace('\0', '')
|
||||||
else:
|
else:
|
||||||
def sanitize(key, value):
|
def sanitize(key, value):
|
||||||
return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
|
return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
|
||||||
|
|||||||
@ -261,9 +261,11 @@ def validate_options(opts):
|
|||||||
elif value in ('inf', 'infinite'):
|
elif value in ('inf', 'infinite'):
|
||||||
return float('inf')
|
return float('inf')
|
||||||
try:
|
try:
|
||||||
return int(value)
|
int_value = int(value)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
validate(False, f'{name} retry count', value)
|
validate(False, f'{name} retry count', value)
|
||||||
|
validate_positive(f'{name} retry count', int_value)
|
||||||
|
return int_value
|
||||||
|
|
||||||
opts.retries = parse_retries('download', opts.retries)
|
opts.retries = parse_retries('download', opts.retries)
|
||||||
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
|
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
|
||||||
|
|||||||
@ -1551,6 +1551,7 @@ from .pluralsight import (
|
|||||||
PluralsightIE,
|
PluralsightIE,
|
||||||
)
|
)
|
||||||
from .plutotv import PlutoTVIE
|
from .plutotv import PlutoTVIE
|
||||||
|
from .plvideo import PlVideoIE
|
||||||
from .podbayfm import (
|
from .podbayfm import (
|
||||||
PodbayFMChannelIE,
|
PodbayFMChannelIE,
|
||||||
PodbayFMIE,
|
PodbayFMIE,
|
||||||
|
|||||||
130
yt_dlp/extractor/plvideo.py
Normal file
130
yt_dlp/extractor/plvideo.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_resolution,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class PlVideoIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Платформа'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
|
||||||
|
'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Y5JzUzkcQTMK',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
|
||||||
|
'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
|
||||||
|
'channel': 'RT en Español',
|
||||||
|
'channel_id': 'ZH4EKqunVDvo',
|
||||||
|
'media_type': 'video',
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
|
||||||
|
'description': 'md5:a1a395d900d77a86542a91ee0826c115',
|
||||||
|
'released_timestamp': 1715096124,
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'like_count': int,
|
||||||
|
'timestamp': 1715095911,
|
||||||
|
'duration': 44320,
|
||||||
|
'view_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'upload_date': '20240507',
|
||||||
|
'modified_date': '20240701',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'modified_timestamp': 1719824073,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
|
||||||
|
'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'S3Uo9c-VLwFX',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'channel': 'Romaatom',
|
||||||
|
'tags': 'count:22',
|
||||||
|
'dislike_count': int,
|
||||||
|
'upload_date': '20241130',
|
||||||
|
'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
|
||||||
|
'duration': 58433,
|
||||||
|
'modified_date': '20241130',
|
||||||
|
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
|
||||||
|
'media_type': 'shorts',
|
||||||
|
'like_count': int,
|
||||||
|
'modified_timestamp': 1732961458,
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'channel_id': 'erJyyTIbmUd1',
|
||||||
|
'timestamp': 1732961355,
|
||||||
|
'comment_count': int,
|
||||||
|
'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'released_timestamp': 1732961458,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_data = self._download_json(
|
||||||
|
f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
|
||||||
|
|
||||||
|
is_live = False
|
||||||
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
automatic_captions = {}
|
||||||
|
for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'url': 'hls',
|
||||||
|
'fps': ('fps', {float_or_none}),
|
||||||
|
'aspect_ratio': ('aspectRatio', {float_or_none}),
|
||||||
|
}),
|
||||||
|
**parse_resolution(quality),
|
||||||
|
})
|
||||||
|
if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
|
||||||
|
is_live = True
|
||||||
|
formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
|
||||||
|
for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||||
|
if lang.endswith('-auto'):
|
||||||
|
automatic_captions.setdefault(lang[:-5], []).append({
|
||||||
|
'url': url,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'automatic_captions': automatic_captions,
|
||||||
|
'is_live': is_live,
|
||||||
|
**traverse_obj(video_data, ('item', {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
|
||||||
|
'duration': ('uploadFile', 'videoDuration', {int_or_none}),
|
||||||
|
'channel': ('channel', 'name', {str}),
|
||||||
|
'channel_id': ('channel', 'id', {str}),
|
||||||
|
'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
|
||||||
|
'channel_is_verified': ('channel', 'verified', {bool}),
|
||||||
|
'tags': ('tags', ..., {str}),
|
||||||
|
'timestamp': ('createdAt', {parse_iso8601}),
|
||||||
|
'released_timestamp': ('publishedAt', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||||
|
'view_count': ('stats', 'viewTotalCount', {int_or_none}),
|
||||||
|
'like_count': ('stats', 'likeCount', {int_or_none}),
|
||||||
|
'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
|
||||||
|
'comment_count': ('stats', 'commentCount', {int_or_none}),
|
||||||
|
'media_type': ('type', {str}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj
|
|||||||
|
|
||||||
|
|
||||||
class XiaoHongShuIE(InfoExtractor):
|
class XiaoHongShuIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)'
|
_VALID_URL = r'https?://www\.xiaohongshu\.com/(?:explore|discovery/item)/(?P<id>[\da-f]+)'
|
||||||
IE_DESC = '小红书'
|
IE_DESC = '小红书'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9',
|
'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9',
|
||||||
@ -25,6 +25,18 @@ class XiaoHongShuIE(InfoExtractor):
|
|||||||
'duration': 101.726,
|
'duration': 101.726,
|
||||||
'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+',
|
'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.xiaohongshu.com/discovery/item/674051740000000007027a15?xsec_token=CBgeL8Dxd1ZWBhwqRd568gAZ_iwG-9JIf9tnApNmteU2E=',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '674051740000000007027a15',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '相互喜欢就可以了',
|
||||||
|
'uploader_id': '63439913000000001901f49a',
|
||||||
|
'duration': 28.073,
|
||||||
|
'description': '#广州[话题]# #深圳[话题]# #香港[话题]# #街头采访[话题]# #是你喜欢的类型[话题]#',
|
||||||
|
'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[\da-f]+/[^/]+',
|
||||||
|
'tags': ['广州', '深圳', '香港', '街头采访', '是你喜欢的类型'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@ -32,7 +32,6 @@ from ..utils import (
|
|||||||
classproperty,
|
classproperty,
|
||||||
clean_html,
|
clean_html,
|
||||||
datetime_from_str,
|
datetime_from_str,
|
||||||
dict_get,
|
|
||||||
filesize_from_tbr,
|
filesize_from_tbr,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -256,11 +255,12 @@ INNERTUBE_CLIENTS = {
|
|||||||
'client': {
|
'client': {
|
||||||
'clientName': 'MWEB',
|
'clientName': 'MWEB',
|
||||||
'clientVersion': '2.20241202.07.00',
|
'clientVersion': '2.20241202.07.00',
|
||||||
# mweb does not require PO Token with this UA
|
# mweb previously did not require PO Token with this UA
|
||||||
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
'SUPPORTS_COOKIES': True,
|
'SUPPORTS_COOKIES': True,
|
||||||
},
|
},
|
||||||
'tv': {
|
'tv': {
|
||||||
@ -567,9 +567,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
|
pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
|
||||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||||
|
|
||||||
|
def _initialize_cookie_auth(self):
|
||||||
|
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||||
|
if yt_sapisid or yt_1psapisid or yt_3psapisid:
|
||||||
|
self.write_debug('Found YouTube account cookies')
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._initialize_pref()
|
self._initialize_pref()
|
||||||
self._initialize_consent()
|
self._initialize_consent()
|
||||||
|
self._initialize_cookie_auth()
|
||||||
self._check_login_required()
|
self._check_login_required()
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
@ -627,32 +633,63 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
|
client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
|
||||||
return context
|
return context
|
||||||
|
|
||||||
_SAPISID = None
|
@staticmethod
|
||||||
|
def _make_sid_authorization(scheme, sid, origin, additional_parts):
|
||||||
|
timestamp = str(round(time.time()))
|
||||||
|
|
||||||
def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
|
hash_parts = []
|
||||||
time_now = round(time.time())
|
if additional_parts:
|
||||||
if self._SAPISID is None:
|
hash_parts.append(':'.join(additional_parts.values()))
|
||||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
hash_parts.extend([timestamp, sid, origin])
|
||||||
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
|
sidhash = hashlib.sha1(' '.join(hash_parts).encode()).hexdigest()
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/393
|
|
||||||
sapisid_cookie = dict_get(
|
parts = [timestamp, sidhash]
|
||||||
yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
|
if additional_parts:
|
||||||
if sapisid_cookie and sapisid_cookie.value:
|
parts.append(''.join(additional_parts))
|
||||||
self._SAPISID = sapisid_cookie.value
|
|
||||||
self.write_debug('Extracted SAPISID cookie')
|
return f'{scheme} {"_".join(parts)}'
|
||||||
# SAPISID cookie is required if not already present
|
|
||||||
if not yt_cookies.get('SAPISID'):
|
def _get_sid_cookies(self):
|
||||||
self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
|
"""
|
||||||
self._set_cookie(
|
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
||||||
'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
|
@returns sapisid, 1psapisid, 3psapisid
|
||||||
else:
|
"""
|
||||||
self._SAPISID = False
|
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||||
if not self._SAPISID:
|
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
||||||
|
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
||||||
|
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
||||||
|
|
||||||
|
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
|
||||||
|
# YouTube also falls back to __Secure-3PAPISID if SAPISID is missing.
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/393
|
||||||
|
|
||||||
|
return yt_sapisid or yt_3papisid, yt_1papisid, yt_3papisid
|
||||||
|
|
||||||
|
def _get_sid_authorization_header(self, origin='https://www.youtube.com', user_session_id=None):
|
||||||
|
"""
|
||||||
|
Generate API Session ID Authorization for Innertube requests. Assumes all requests are secure (https).
|
||||||
|
@param origin: Origin URL
|
||||||
|
@param user_session_id: Optional User Session ID
|
||||||
|
@return: Authorization header value
|
||||||
|
"""
|
||||||
|
|
||||||
|
authorizations = []
|
||||||
|
additional_parts = {}
|
||||||
|
if user_session_id:
|
||||||
|
additional_parts['u'] = user_session_id
|
||||||
|
|
||||||
|
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||||
|
|
||||||
|
for scheme, sid in (('SAPISIDHASH', yt_sapisid),
|
||||||
|
('SAPISID1PHASH', yt_1psapisid),
|
||||||
|
('SAPISID3PHASH', yt_3psapisid)):
|
||||||
|
if sid:
|
||||||
|
authorizations.append(self._make_sid_authorization(scheme, sid, origin, additional_parts))
|
||||||
|
|
||||||
|
if not authorizations:
|
||||||
return None
|
return None
|
||||||
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
|
|
||||||
sapisidhash = hashlib.sha1(
|
return ' '.join(authorizations)
|
||||||
f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
|
|
||||||
return f'SAPISIDHASH {time_now}_{sapisidhash}'
|
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page',
|
note='Downloading API JSON', errnote='Unable to download API page',
|
||||||
@ -688,26 +725,48 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
if session_index is not None:
|
if session_index is not None:
|
||||||
return session_index
|
return session_index
|
||||||
|
|
||||||
def _data_sync_id_to_delegated_session_id(self, data_sync_id):
|
@staticmethod
|
||||||
if not data_sync_id:
|
def _parse_data_sync_id(data_sync_id):
|
||||||
return
|
|
||||||
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
|
||||||
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
|
||||||
channel_syncid, _, user_syncid = data_sync_id.partition('||')
|
|
||||||
if user_syncid:
|
|
||||||
return channel_syncid
|
|
||||||
|
|
||||||
def _extract_account_syncid(self, *args):
|
|
||||||
"""
|
"""
|
||||||
Extract current session ID required to download private playlists of secondary channels
|
Parse data_sync_id into delegated_session_id and user_session_id.
|
||||||
|
|
||||||
|
data_sync_id is of the form "delegated_session_id||user_session_id" for secondary channel
|
||||||
|
and just "user_session_id||" for primary channel.
|
||||||
|
|
||||||
|
@param data_sync_id: data_sync_id string
|
||||||
|
@return: Tuple of (delegated_session_id, user_session_id)
|
||||||
|
"""
|
||||||
|
if not data_sync_id:
|
||||||
|
return None, None
|
||||||
|
first, _, second = data_sync_id.partition('||')
|
||||||
|
if second:
|
||||||
|
return first, second
|
||||||
|
return None, first
|
||||||
|
|
||||||
|
def _extract_delegated_session_id(self, *args):
|
||||||
|
"""
|
||||||
|
Extract current delegated session ID required to download private playlists of secondary channels
|
||||||
@params response and/or ytcfg
|
@params response and/or ytcfg
|
||||||
|
@return: delegated session ID
|
||||||
"""
|
"""
|
||||||
# ytcfg includes channel_syncid if on secondary channel
|
# ytcfg includes channel_syncid if on secondary channel
|
||||||
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
|
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
|
||||||
return delegated_sid
|
return delegated_sid
|
||||||
|
|
||||||
data_sync_id = self._extract_data_sync_id(*args)
|
data_sync_id = self._extract_data_sync_id(*args)
|
||||||
return self._data_sync_id_to_delegated_session_id(data_sync_id)
|
return self._parse_data_sync_id(data_sync_id)[0]
|
||||||
|
|
||||||
|
def _extract_user_session_id(self, *args):
|
||||||
|
"""
|
||||||
|
Extract current user session ID
|
||||||
|
@params response and/or ytcfg
|
||||||
|
@return: user session ID
|
||||||
|
"""
|
||||||
|
if user_sid := traverse_obj(args, (..., 'USER_SESSION_ID', {str}, any)):
|
||||||
|
return user_sid
|
||||||
|
|
||||||
|
data_sync_id = self._extract_data_sync_id(*args)
|
||||||
|
return self._parse_data_sync_id(data_sync_id)[1]
|
||||||
|
|
||||||
def _extract_data_sync_id(self, *args):
|
def _extract_data_sync_id(self, *args):
|
||||||
"""
|
"""
|
||||||
@ -734,7 +793,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def is_authenticated(self):
|
def is_authenticated(self):
|
||||||
return bool(self._generate_sapisidhash_header())
|
return bool(self._get_sid_authorization_header())
|
||||||
|
|
||||||
def extract_ytcfg(self, video_id, webpage):
|
def extract_ytcfg(self, video_id, webpage):
|
||||||
if not webpage:
|
if not webpage:
|
||||||
@ -744,25 +803,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||||
default='{}'), video_id, fatal=False) or {}
|
default='{}'), video_id, fatal=False) or {}
|
||||||
|
|
||||||
def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
|
def _generate_cookie_auth_headers(self, *, ytcfg=None, delegated_session_id=None, user_session_id=None, session_index=None, origin=None, **kwargs):
|
||||||
headers = {}
|
headers = {}
|
||||||
account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
|
delegated_session_id = delegated_session_id or self._extract_delegated_session_id(ytcfg)
|
||||||
if account_syncid:
|
if delegated_session_id:
|
||||||
headers['X-Goog-PageId'] = account_syncid
|
headers['X-Goog-PageId'] = delegated_session_id
|
||||||
if session_index is None:
|
if session_index is None:
|
||||||
session_index = self._extract_session_index(ytcfg)
|
session_index = self._extract_session_index(ytcfg)
|
||||||
if account_syncid or session_index is not None:
|
if delegated_session_id or session_index is not None:
|
||||||
headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
|
headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
|
||||||
|
|
||||||
auth = self._generate_sapisidhash_header(origin)
|
auth = self._get_sid_authorization_header(origin, user_session_id=user_session_id or self._extract_user_session_id(ytcfg))
|
||||||
if auth is not None:
|
if auth is not None:
|
||||||
headers['Authorization'] = auth
|
headers['Authorization'] = auth
|
||||||
headers['X-Origin'] = origin
|
headers['X-Origin'] = origin
|
||||||
|
|
||||||
|
if traverse_obj(ytcfg, 'LOGGED_IN', expected_type=bool):
|
||||||
|
headers['X-Youtube-Bootstrap-Logged-In'] = 'true'
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def generate_api_headers(
|
def generate_api_headers(
|
||||||
self, *, ytcfg=None, account_syncid=None, session_index=None,
|
self, *, ytcfg=None, delegated_session_id=None, user_session_id=None, session_index=None,
|
||||||
visitor_data=None, api_hostname=None, default_client='web', **kwargs):
|
visitor_data=None, api_hostname=None, default_client='web', **kwargs):
|
||||||
|
|
||||||
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
|
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
|
||||||
@ -773,7 +835,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'Origin': origin,
|
'Origin': origin,
|
||||||
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
|
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
|
||||||
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
|
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
|
||||||
**self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
|
**self._generate_cookie_auth_headers(
|
||||||
|
ytcfg=ytcfg,
|
||||||
|
delegated_session_id=delegated_session_id,
|
||||||
|
user_session_id=user_session_id,
|
||||||
|
session_index=session_index,
|
||||||
|
origin=origin),
|
||||||
}
|
}
|
||||||
return filter_dict(headers)
|
return filter_dict(headers)
|
||||||
|
|
||||||
@ -1356,8 +1423,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||||
}
|
}
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||||
_DEFAULT_CLIENTS = ('ios', 'mweb')
|
_DEFAULT_CLIENTS = ('ios', 'tv')
|
||||||
_DEFAULT_AUTHED_CLIENTS = ('web_creator', 'mweb')
|
_DEFAULT_AUTHED_CLIENTS = ('web_creator', 'tv')
|
||||||
|
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
@ -3836,9 +3903,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
default_client=client,
|
default_client=client,
|
||||||
visitor_data=visitor_data,
|
visitor_data=visitor_data,
|
||||||
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
|
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||||
account_syncid=(
|
delegated_session_id=(
|
||||||
self._data_sync_id_to_delegated_session_id(data_sync_id)
|
self._parse_data_sync_id(data_sync_id)[0]
|
||||||
or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
|
or self._extract_delegated_session_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||||
|
),
|
||||||
|
user_session_id=(
|
||||||
|
self._parse_data_sync_id(data_sync_id)[1]
|
||||||
|
or self._extract_user_session_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -5350,7 +5421,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
if not continuation_list[0]:
|
if not continuation_list[0]:
|
||||||
continuation_list[0] = self._extract_continuation(parent_renderer)
|
continuation_list[0] = self._extract_continuation(parent_renderer)
|
||||||
|
|
||||||
def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
|
def _entries(self, tab, item_id, ytcfg, delegated_session_id, visitor_data):
|
||||||
continuation_list = [None]
|
continuation_list = [None]
|
||||||
extract_entries = lambda x: self._extract_entries(x, continuation_list)
|
extract_entries = lambda x: self._extract_entries(x, continuation_list)
|
||||||
tab_content = try_get(tab, lambda x: x['content'], dict)
|
tab_content = try_get(tab, lambda x: x['content'], dict)
|
||||||
@ -5371,7 +5442,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
break
|
break
|
||||||
seen_continuations.add(continuation_token)
|
seen_continuations.add(continuation_token)
|
||||||
headers = self.generate_api_headers(
|
headers = self.generate_api_headers(
|
||||||
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
|
ytcfg=ytcfg, delegated_session_id=delegated_session_id, visitor_data=visitor_data)
|
||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=f'{item_id} page {page_num}',
|
item_id=f'{item_id} page {page_num}',
|
||||||
query=continuation, headers=headers, ytcfg=ytcfg,
|
query=continuation, headers=headers, ytcfg=ytcfg,
|
||||||
@ -5441,7 +5512,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(
|
self._entries(
|
||||||
selected_tab, metadata['id'], ytcfg,
|
selected_tab, metadata['id'], ytcfg,
|
||||||
self._extract_account_syncid(ytcfg, data),
|
self._extract_delegated_session_id(ytcfg, data),
|
||||||
self._extract_visitor_data(data, ytcfg)),
|
self._extract_visitor_data(data, ytcfg)),
|
||||||
**metadata)
|
**metadata)
|
||||||
|
|
||||||
@ -5593,7 +5664,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
watch_endpoint = try_get(
|
watch_endpoint = try_get(
|
||||||
playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
|
playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
|
||||||
headers = self.generate_api_headers(
|
headers = self.generate_api_headers(
|
||||||
ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
|
ytcfg=ytcfg, delegated_session_id=self._extract_delegated_session_id(ytcfg, data),
|
||||||
visitor_data=self._extract_visitor_data(response, data, ytcfg))
|
visitor_data=self._extract_visitor_data(response, data, ytcfg))
|
||||||
query = {
|
query = {
|
||||||
'playlistId': playlist_id,
|
'playlistId': playlist_id,
|
||||||
@ -5691,7 +5762,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
if not is_playlist:
|
if not is_playlist:
|
||||||
return
|
return
|
||||||
headers = self.generate_api_headers(
|
headers = self.generate_api_headers(
|
||||||
ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
|
ytcfg=ytcfg, delegated_session_id=self._extract_delegated_session_id(ytcfg, data),
|
||||||
visitor_data=self._extract_visitor_data(data, ytcfg))
|
visitor_data=self._extract_visitor_data(data, ytcfg))
|
||||||
query = {
|
query = {
|
||||||
'params': 'wgYCCAA=',
|
'params': 'wgYCCAA=',
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user