mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-06-12 13:54:28 +00:00
[ie/pornhub] Support browser impersonation (#16794)
Closes #16729 Authored by: 0xvd
This commit is contained in:
parent
618b5e446c
commit
83564f85db
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_qs,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
remove_start,
|
remove_start,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@ -31,6 +32,14 @@ class PornHubBaseIE(InfoExtractor):
|
|||||||
_NETRC_MACHINE = 'pornhub'
|
_NETRC_MACHINE = 'pornhub'
|
||||||
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
|
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_headers(host):
|
||||||
|
return {
|
||||||
|
# Origin & Referer are needed for manifest requests to avoid HTTP Errror 412
|
||||||
|
'Origin': f'https://www.{host}',
|
||||||
|
'Referer': f'https://www.{host}/',
|
||||||
|
}
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
@ -61,7 +70,7 @@ class PornHubBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
def _set_age_cookies(self, host):
|
def _set_age_cookies(self, host):
|
||||||
self._set_cookie(host, 'age_verified', '1')
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
|
self._set_cookie(host, 'accessAgeDisclaimerPH', '1') # site sets '2'
|
||||||
self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
|
self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
|
||||||
self._set_cookie(host, 'accessPH', '1')
|
self._set_cookie(host, 'accessPH', '1')
|
||||||
|
|
||||||
@ -83,7 +92,7 @@ class PornHubBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '')
|
login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '')
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
login_url, None, f'Downloading {site} login page')
|
login_url, None, f'Downloading {site} login page', impersonate=True)
|
||||||
|
|
||||||
def is_logged(webpage):
|
def is_logged(webpage):
|
||||||
return any(re.search(p, webpage) for p in (
|
return any(re.search(p, webpage) for p in (
|
||||||
@ -109,7 +118,7 @@ class PornHubBaseIE(InfoExtractor):
|
|||||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
'Referer': login_url,
|
'Referer': login_url,
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
})
|
}, impersonate=True)
|
||||||
|
|
||||||
if response.get('success') == '1':
|
if response.get('success') == '1':
|
||||||
self._logged_in = True
|
self._logged_in = True
|
||||||
@ -279,9 +288,14 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
|
|
||||||
def dl_webpage(platform):
|
def dl_webpage(platform):
|
||||||
self._set_cookie(host, 'platform', platform)
|
self._set_cookie(host, 'platform', platform)
|
||||||
return self._download_webpage(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
f'https://www.{host}/view_video.php?viewkey={video_id}',
|
f'https://www.{host}/view_video.php?viewkey={video_id}',
|
||||||
video_id, f'Downloading {platform} webpage')
|
video_id, f'Downloading {platform} webpage',
|
||||||
|
impersonate=True)
|
||||||
|
if parse_qs(urlh.url).get('viewkey', [None])[-1] != video_id:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Redirection detected; the video may be deleted or require login', expected=True)
|
||||||
|
return webpage
|
||||||
|
|
||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
@ -423,15 +437,16 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def add_format(format_url, height=None):
|
def add_format(format_url, height=None):
|
||||||
|
headers = self._get_headers(host)
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if ext == 'mpd':
|
if ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
format_url, video_id, mpd_id='dash', fatal=False))
|
format_url, video_id, mpd_id='dash', fatal=False, headers=headers))
|
||||||
return
|
return
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False, headers=headers))
|
||||||
return
|
return
|
||||||
if not height:
|
if not height:
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
@ -450,7 +465,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = upload_date.replace('/', '')
|
upload_date = upload_date.replace('/', '')
|
||||||
if '/video/get_media' in video_url:
|
if '/video/get_media' in video_url:
|
||||||
medias = self._download_json(video_url, video_id, fatal=False)
|
medias = self._download_json(video_url, video_id, fatal=False, impersonate=True)
|
||||||
if isinstance(medias, list):
|
if isinstance(medias, list):
|
||||||
for media in medias:
|
for media in medias:
|
||||||
if not isinstance(media, dict):
|
if not isinstance(media, dict):
|
||||||
@ -506,7 +521,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
|
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
|
||||||
}),
|
}),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': {'Referer': f'https://www.{host}/'},
|
'http_headers': self._get_headers(host),
|
||||||
}, info)
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
@ -598,7 +613,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
|||||||
def download_page(base_url, num, fallback=False):
|
def download_page(base_url, num, fallback=False):
|
||||||
note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '')
|
note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '')
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
base_url, item_id, note, query={'page': num})
|
base_url, item_id, note, query={'page': num}, impersonate=True)
|
||||||
|
|
||||||
def is_404(e):
|
def is_404(e):
|
||||||
return isinstance(e.cause, HTTPError) and e.cause.status == 404
|
return isinstance(e.cause, HTTPError) and e.cause.status == 404
|
||||||
@ -799,7 +814,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
|||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'page': page_num,
|
'page': page_num,
|
||||||
'token': token,
|
'token': token,
|
||||||
})
|
}, impersonate=True)
|
||||||
|
|
||||||
for page_num in range(1, page_count + 1):
|
for page_num in range(1, page_count + 1):
|
||||||
if page_num > 1:
|
if page_num > 1:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user