From 8b8e3e3cb4d3ba0dedf7b1fd00ce68f07da7e588 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Wed, 1 Jul 2026 16:46:52 -0500
Subject: [PATCH] [ie/instagram] Add fallback for when impersonation is
 unavailable (#17113)

Fix f49b551a0c4c25358d2afaeda4ee63989d2d56ab

Authored by: bashonly
---
 yt_dlp/extractor/instagram.py | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 9e1fc59c52..880b78f5d3 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -380,6 +380,8 @@ class InstagramIE(InstagramBaseIE):
         'only_matching': True,
     }]
 
+    _SJS_RE = re.compile(r'<script\b[^>]+\bdata-sjs>(\{.+?\})</script>')
+
     _lsd_token = None
 
     @classmethod
@@ -397,11 +399,9 @@ class InstagramIE(InstagramBaseIE):
         if self._is_logged_in:
             return
         if not self._lsd_token:
-            webpage = self._download_webpage(
-                self._BASE_URL, None, 'Setting up session',
-                impersonate=True, require_impersonation=True)
+            webpage = self._download_webpage(self._BASE_URL, None, 'Setting up session', impersonate=True)
             eqmc = self._search_json(
-                r'<script\b[^>]* id="__eqmc"[^>]*>', webpage, 'eqmc JSON', None, default={})
+                r'<script\b[^>]*\bid="__eqmc"[^>]*>', webpage, 'eqmc JSON', None, default={})
             self._lsd_token = (
                 traverse_obj(eqmc, ('l', {str}))
                 or self._search_regex(r'\["LSD",\[\],\{"token":"([^"]+)"', webpage, 'LSD token'))
@@ -419,7 +419,7 @@ class InstagramIE(InstagramBaseIE):
         api_check = self._download_json(
             f'{self._API_BASE_URL}/web/get_ruling_for_content/', video_id,
             'Checking post accessibility', errnote=False, fatal=False,
-            impersonate=True, require_impersonation=True, headers=self._api_headers,
+            impersonate=True, headers=self._api_headers,
             query={'content_type': 'MEDIA', 'target_id': media_id}) or {}
 
         csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
@@ -432,7 +432,7 @@ class InstagramIE(InstagramBaseIE):
 
         response = self._download_json(
             'https://www.instagram.com/api/graphql', video_id,
-            impersonate=True, require_impersonation=True,
+            fatal=False, impersonate=True,
             headers=filter_dict({
                 **self._api_headers,
                 'X-FB-Friendly-Name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery',
@@ -441,10 +441,6 @@ class InstagramIE(InstagramBaseIE):
                 'X-Requested-With': 'XMLHttpRequest',
                 'Referer': url,
             }), data=urlencode_postdata({
-                'av': '0',
-                '__d': 'www',
-                '__user': '0',
-                'dpr': '1',
                 'lsd': self._lsd_token,
                 'fb_api_caller_class': 'RelayModern',
                 'fb_api_req_friendly_name': 'PolarisLoggedOutDesktopWWWPostRootContentQuery',
@@ -455,6 +451,7 @@ class InstagramIE(InstagramBaseIE):
 
         media = traverse_obj(response, ('data', 'xig_polaris_media', {dict}))
         product_info = traverse_obj(media, ('if_not_gated_logged_out', {dict}))
+
         if not product_info:
             error = join_nonempty('title', 'description', delim=': ', from_dict=api_check)
             if 'Restricted Video' in error:
@@ -466,6 +463,23 @@ class InstagramIE(InstagramBaseIE):
                 # Only raise after getting empty response; sometimes "long"-shortcode posts are public
                 self.raise_login_required(
                     'This content is only available for registered users who follow this account')
+
+            webpage, urlh = self._download_webpage_handle(
+                f'https://www.instagram.com/p/{video_id}', video_id)
+            if urlh.url.startswith(self._LOGIN_URL):
+                self.raise_login_required(
+                    'The webpage request was redirected to the login page. '
+                    'You have exceeded the rate-limit for accessing posts anonymously')
+
+            media = traverse_obj(webpage, (
+                {self._SJS_RE.findall}, ..., {json.loads},
+                'require', ..., ..., ..., '__bbox', 'require',
+                lambda _, v: v[0] == 'RelayPrefetchedStreamCache', ...,
+                lambda _, v: v['__bbox']['result']['data']['xig_polaris_media'],
+                '__bbox', 'result', 'data', 'xig_polaris_media', {dict}, any))
+            product_info = traverse_obj(media, ('if_not_gated_logged_out', {dict}))
+
+        if not product_info:
             raise ExtractorError(
                 'Instagram sent an empty media response. Check if this post is accessible in your '
                 f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. '