From 83564f85db7507486fbe3b0d0e72498f31ab0600 Mon Sep 17 00:00:00 2001
From: 0xvd <199783523+0xvd@users.noreply.github.com>
Date: Tue, 9 Jun 2026 20:14:18 +0530
Subject: [PATCH] [ie/pornhub] Support browser impersonation (#16794)

Closes #16729
Authored by: 0xvd
---
 yt_dlp/extractor/pornhub.py | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 3fc802e15d..889eb8d259 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -17,6 +17,7 @@ from ..utils import (
     int_or_none,
     merge_dicts,
     orderedSet,
+    parse_qs,
     remove_quotes,
     remove_start,
     str_to_int,
@@ -31,6 +32,14 @@ class PornHubBaseIE(InfoExtractor):
     _NETRC_MACHINE = 'pornhub'
     _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
 
+    @staticmethod
+    def _get_headers(host):
+        return {
+            # Origin & Referer are needed for manifest requests to avoid HTTP Errror 412
+            'Origin': f'https://www.{host}',
+            'Referer': f'https://www.{host}/',
+        }
+
     def _download_webpage_handle(self, *args, **kwargs):
         def dl(*args, **kwargs):
             return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@@ -61,7 +70,7 @@ class PornHubBaseIE(InfoExtractor):
 
     def _set_age_cookies(self, host):
         self._set_cookie(host, 'age_verified', '1')
-        self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerPH', '1')  # site sets '2'
         self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
         self._set_cookie(host, 'accessPH', '1')
 
@@ -83,7 +92,7 @@ class PornHubBaseIE(InfoExtractor):
 
         login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '')
         login_page = self._download_webpage(
-            login_url, None, f'Downloading {site} login page')
+            login_url, None, f'Downloading {site} login page', impersonate=True)
 
         def is_logged(webpage):
             return any(re.search(p, webpage) for p in (
@@ -109,7 +118,7 @@ class PornHubBaseIE(InfoExtractor):
                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                 'Referer': login_url,
                 'X-Requested-With': 'XMLHttpRequest',
-            })
+            }, impersonate=True)
 
         if response.get('success') == '1':
             self._logged_in = True
@@ -279,9 +288,14 @@ class PornHubIE(PornHubBaseIE):
 
         def dl_webpage(platform):
             self._set_cookie(host, 'platform', platform)
-            return self._download_webpage(
+            webpage, urlh = self._download_webpage_handle(
                 f'https://www.{host}/view_video.php?viewkey={video_id}',
-                video_id, f'Downloading {platform} webpage')
+                video_id, f'Downloading {platform} webpage',
+                impersonate=True)
+            if parse_qs(urlh.url).get('viewkey', [None])[-1] != video_id:
+                raise ExtractorError(
+                    'Redirection detected; the video may be deleted or require login', expected=True)
+            return webpage
 
         webpage = dl_webpage('pc')
 
@@ -423,15 +437,16 @@ class PornHubIE(PornHubBaseIE):
         formats = []
 
         def add_format(format_url, height=None):
+            headers = self._get_headers(host)
             ext = determine_ext(format_url)
             if ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
-                    format_url, video_id, mpd_id='dash', fatal=False))
+                    format_url, video_id, mpd_id='dash', fatal=False, headers=headers))
                 return
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False, headers=headers))
                 return
             if not height:
                 height = int_or_none(self._search_regex(
@@ -450,7 +465,7 @@ class PornHubIE(PornHubBaseIE):
                 if upload_date:
                     upload_date = upload_date.replace('/', '')
             if '/video/get_media' in video_url:
-                medias = self._download_json(video_url, video_id, fatal=False)
+                medias = self._download_json(video_url, video_id, fatal=False, impersonate=True)
                 if isinstance(medias, list):
                     for media in medias:
                         if not isinstance(media, dict):
@@ -506,7 +521,7 @@ class PornHubIE(PornHubBaseIE):
                 'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
             }),
             'subtitles': subtitles,
-            'http_headers': {'Referer': f'https://www.{host}/'},
+            'http_headers': self._get_headers(host),
         }, info)
 
 
@@ -598,7 +613,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
         def download_page(base_url, num, fallback=False):
             note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '')
             return self._download_webpage(
-                base_url, item_id, note, query={'page': num})
+                base_url, item_id, note, query={'page': num}, impersonate=True)
 
         def is_404(e):
             return isinstance(e.cause, HTTPError) and e.cause.status == 404
@@ -799,7 +814,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
                 'id': playlist_id,
                 'page': page_num,
                 'token': token,
-            })
+            }, impersonate=True)
 
         for page_num in range(1, page_count + 1):
             if page_num > 1: