[ie/instagram] Add fallback for when impersonation is unavailable (#17113)

Fix f49b551a0c4c25358d2afaeda4ee63989d2d56ab

Authored by: bashonly
This commit is contained in:
bashonly 2026-07-01 16:46:52 -05:00 committed by GitHub
parent 249aa5d6e6
commit 8b8e3e3cb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -380,6 +380,8 @@ class InstagramIE(InstagramBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_SJS_RE = re.compile(r'<script\b[^>]+\bdata-sjs>(\{.+?\})</script>')
_lsd_token = None _lsd_token = None
@classmethod @classmethod
@ -397,11 +399,9 @@ class InstagramIE(InstagramBaseIE):
if self._is_logged_in: if self._is_logged_in:
return return
if not self._lsd_token: if not self._lsd_token:
webpage = self._download_webpage( webpage = self._download_webpage(self._BASE_URL, None, 'Setting up session', impersonate=True)
self._BASE_URL, None, 'Setting up session',
impersonate=True, require_impersonation=True)
eqmc = self._search_json( eqmc = self._search_json(
r'<script\b[^>]* id="__eqmc"[^>]*>', webpage, 'eqmc JSON', None, default={}) r'<script\b[^>]*\bid="__eqmc"[^>]*>', webpage, 'eqmc JSON', None, default={})
self._lsd_token = ( self._lsd_token = (
traverse_obj(eqmc, ('l', {str})) traverse_obj(eqmc, ('l', {str}))
or self._search_regex(r'\["LSD",\[\],\{"token":"([^"]+)"', webpage, 'LSD token')) or self._search_regex(r'\["LSD",\[\],\{"token":"([^"]+)"', webpage, 'LSD token'))
@ -419,7 +419,7 @@ class InstagramIE(InstagramBaseIE):
api_check = self._download_json( api_check = self._download_json(
f'{self._API_BASE_URL}/web/get_ruling_for_content/', video_id, f'{self._API_BASE_URL}/web/get_ruling_for_content/', video_id,
'Checking post accessibility', errnote=False, fatal=False, 'Checking post accessibility', errnote=False, fatal=False,
impersonate=True, require_impersonation=True, headers=self._api_headers, impersonate=True, headers=self._api_headers,
query={'content_type': 'MEDIA', 'target_id': media_id}) or {} query={'content_type': 'MEDIA', 'target_id': media_id}) or {}
csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken') csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
@ -432,7 +432,7 @@ class InstagramIE(InstagramBaseIE):
response = self._download_json( response = self._download_json(
'https://www.instagram.com/api/graphql', video_id, 'https://www.instagram.com/api/graphql', video_id,
impersonate=True, require_impersonation=True, fatal=False, impersonate=True,
headers=filter_dict({ headers=filter_dict({
**self._api_headers, **self._api_headers,
'X-FB-Friendly-Name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery', 'X-FB-Friendly-Name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery',
@ -441,10 +441,6 @@ class InstagramIE(InstagramBaseIE):
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
'Referer': url, 'Referer': url,
}), data=urlencode_postdata({ }), data=urlencode_postdata({
'av': '0',
'__d': 'www',
'__user': '0',
'dpr': '1',
'lsd': self._lsd_token, 'lsd': self._lsd_token,
'fb_api_caller_class': 'RelayModern', 'fb_api_caller_class': 'RelayModern',
'fb_api_req_friendly_name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery', 'fb_api_req_friendly_name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery',
@ -455,6 +451,7 @@ class InstagramIE(InstagramBaseIE):
media = traverse_obj(response, ('data', 'xig_polaris_media', {dict})) media = traverse_obj(response, ('data', 'xig_polaris_media', {dict}))
product_info = traverse_obj(media, ('if_not_gated_logged_out', {dict})) product_info = traverse_obj(media, ('if_not_gated_logged_out', {dict}))
if not product_info: if not product_info:
error = join_nonempty('title', 'description', delim=': ', from_dict=api_check) error = join_nonempty('title', 'description', delim=': ', from_dict=api_check)
if 'Restricted Video' in error: if 'Restricted Video' in error:
@ -466,6 +463,23 @@ class InstagramIE(InstagramBaseIE):
# Only raise after getting empty response; sometimes "long"-shortcode posts are public # Only raise after getting empty response; sometimes "long"-shortcode posts are public
self.raise_login_required( self.raise_login_required(
'This content is only available for registered users who follow this account') 'This content is only available for registered users who follow this account')
webpage, urlh = self._download_webpage_handle(
f'https://www.instagram.com/p/{video_id}', video_id)
if urlh.url.startswith(self._LOGIN_URL):
self.raise_login_required(
'The webpage request was redirected to the login page. '
'You have exceeded the rate-limit for accessing posts anonymously')
media = traverse_obj(webpage, (
{self._SJS_RE.findall}, ..., {json.loads},
'require', ..., ..., ..., '__bbox', 'require',
lambda _, v: v[0] == 'RelayPrefetchedStreamCache', ...,
lambda _, v: v['__bbox']['result']['data']['xig_polaris_media'],
'__bbox', 'result', 'data', 'xig_polaris_media', {dict}, any))
product_info = traverse_obj(media, ('if_not_gated_logged_out', {dict}))
if not product_info:
raise ExtractorError( raise ExtractorError(
'Instagram sent an empty media response. Check if this post is accessible in your ' 'Instagram sent an empty media response. Check if this post is accessible in your '
f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. ' f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. '