[ie/pornhub] Support browser impersonation (#16794)

Closes #16729
Authored by: 0xvd
This commit is contained in:
0xvd 2026-06-09 20:14:18 +05:30 committed by GitHub
parent 618b5e446c
commit 83564f85db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -17,6 +17,7 @@ from ..utils import (
int_or_none, int_or_none,
merge_dicts, merge_dicts,
orderedSet, orderedSet,
parse_qs,
remove_quotes, remove_quotes,
remove_start, remove_start,
str_to_int, str_to_int,
@ -31,6 +32,14 @@ class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub' _NETRC_MACHINE = 'pornhub'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)' _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
@staticmethod
def _get_headers(host):
return {
# Origin & Referer are needed for manifest requests to avoid HTTP Errror 412
'Origin': f'https://www.{host}',
'Referer': f'https://www.{host}/',
}
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs): def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@ -61,7 +70,7 @@ class PornHubBaseIE(InfoExtractor):
def _set_age_cookies(self, host): def _set_age_cookies(self, host):
self._set_cookie(host, 'age_verified', '1') self._set_cookie(host, 'age_verified', '1')
self._set_cookie(host, 'accessAgeDisclaimerPH', '1') self._set_cookie(host, 'accessAgeDisclaimerPH', '1') # site sets '2'
self._set_cookie(host, 'accessAgeDisclaimerUK', '1') self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
self._set_cookie(host, 'accessPH', '1') self._set_cookie(host, 'accessPH', '1')
@ -83,7 +92,7 @@ class PornHubBaseIE(InfoExtractor):
login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '') login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '')
login_page = self._download_webpage( login_page = self._download_webpage(
login_url, None, f'Downloading {site} login page') login_url, None, f'Downloading {site} login page', impersonate=True)
def is_logged(webpage): def is_logged(webpage):
return any(re.search(p, webpage) for p in ( return any(re.search(p, webpage) for p in (
@ -109,7 +118,7 @@ class PornHubBaseIE(InfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': login_url, 'Referer': login_url,
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}) }, impersonate=True)
if response.get('success') == '1': if response.get('success') == '1':
self._logged_in = True self._logged_in = True
@ -279,9 +288,14 @@ class PornHubIE(PornHubBaseIE):
def dl_webpage(platform): def dl_webpage(platform):
self._set_cookie(host, 'platform', platform) self._set_cookie(host, 'platform', platform)
return self._download_webpage( webpage, urlh = self._download_webpage_handle(
f'https://www.{host}/view_video.php?viewkey={video_id}', f'https://www.{host}/view_video.php?viewkey={video_id}',
video_id, f'Downloading {platform} webpage') video_id, f'Downloading {platform} webpage',
impersonate=True)
if parse_qs(urlh.url).get('viewkey', [None])[-1] != video_id:
raise ExtractorError(
'Redirection detected; the video may be deleted or require login', expected=True)
return webpage
webpage = dl_webpage('pc') webpage = dl_webpage('pc')
@ -423,15 +437,16 @@ class PornHubIE(PornHubBaseIE):
formats = [] formats = []
def add_format(format_url, height=None): def add_format(format_url, height=None):
headers = self._get_headers(host)
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext == 'mpd': if ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)) format_url, video_id, mpd_id='dash', fatal=False, headers=headers))
return return
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native', format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False, headers=headers))
return return
if not height: if not height:
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
@ -450,7 +465,7 @@ class PornHubIE(PornHubBaseIE):
if upload_date: if upload_date:
upload_date = upload_date.replace('/', '') upload_date = upload_date.replace('/', '')
if '/video/get_media' in video_url: if '/video/get_media' in video_url:
medias = self._download_json(video_url, video_id, fatal=False) medias = self._download_json(video_url, video_id, fatal=False, impersonate=True)
if isinstance(medias, list): if isinstance(medias, list):
for media in medias: for media in medias:
if not isinstance(media, dict): if not isinstance(media, dict):
@ -506,7 +521,7 @@ class PornHubIE(PornHubBaseIE):
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}), 'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
}), }),
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': {'Referer': f'https://www.{host}/'}, 'http_headers': self._get_headers(host),
}, info) }, info)
@ -598,7 +613,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
def download_page(base_url, num, fallback=False): def download_page(base_url, num, fallback=False):
note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '') note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '')
return self._download_webpage( return self._download_webpage(
base_url, item_id, note, query={'page': num}) base_url, item_id, note, query={'page': num}, impersonate=True)
def is_404(e): def is_404(e):
return isinstance(e.cause, HTTPError) and e.cause.status == 404 return isinstance(e.cause, HTTPError) and e.cause.status == 404
@ -799,7 +814,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
'id': playlist_id, 'id': playlist_id,
'page': page_num, 'page': page_num,
'token': token, 'token': token,
}) }, impersonate=True)
for page_num in range(1, page_count + 1): for page_num in range(1, page_count + 1):
if page_num > 1: if page_num > 1: