Compare commits

..

No commits in common. "1d1358d09fedcdc6b3e83538a29b0b539cb9be3f" and "81bdea03f3414dd4d086610c970ec14e15bd3d36" have entirely different histories.

8 changed files with 34 additions and 112 deletions

View File

@ -5,12 +5,10 @@ from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
make_archive_id,
remove_start, remove_start,
traverse_obj,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
@ -31,19 +29,6 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'historyvault.com': (None, 'historyvault', None), 'historyvault.com': (None, 'historyvault', None),
'biography.com': (None, 'biography', None), 'biography.com': (None, 'biography', None),
} }
_GRAPHQL_QUERY = '''
query getUserVideo($videoId: ID!) {
video(id: $videoId) {
title
publicUrl
programId
tvSeasonNumber
tvSeasonEpisodeNumber
series {
title
}
}
}'''
def _extract_aen_smil(self, smil_url, video_id, auth=None): def _extract_aen_smil(self, smil_url, video_id, auth=None):
query = { query = {
@ -88,39 +73,19 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
def _extract_aetn_info(self, domain, filter_key, filter_value, url): def _extract_aetn_info(self, domain, filter_key, filter_value, url):
requestor_id, brand, software_statement = self._DOMAIN_MAP[domain] requestor_id, brand, software_statement = self._DOMAIN_MAP[domain]
if filter_key == 'canonical':
webpage = self._download_webpage(url, filter_value)
graphql_video_id = self._search_regex(
r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage,
'id') or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
else:
graphql_video_id = filter_value
result = self._download_json( result = self._download_json(
'https://yoga.appsvcs.aetnd.com/', graphql_video_id, f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
query={ filter_value, query={f'filter[{filter_key}]': filter_value})
'brand': brand, result = traverse_obj(
'mode': 'live', result, ('results',
'platform': 'web', lambda k, v: k == 0 and v[filter_key] == filter_value),
}, get_all=False)
data=json.dumps({ if not result:
'operationName': 'getUserVideo',
'variables': {
'videoId': graphql_video_id,
},
'query': self._GRAPHQL_QUERY,
}).encode(),
headers={
'Content-Type': 'application/json',
})
result = traverse_obj(result, ('data', 'video', {dict}))
media_url = traverse_obj(result, ('publicUrl', {url_or_none}))
if not media_url:
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True, raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
video_id=remove_start(filter_value, '/')) video_id=remove_start(filter_value, '/'))
title = result['title'] title = result['title']
video_id = result['programId'] video_id = result['id']
media_url = result['publicUrl']
theplatform_metadata = self._download_theplatform_metadata(self._search_regex( theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)
@ -135,13 +100,9 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
info.update(self._extract_aen_smil(media_url, video_id, auth)) info.update(self._extract_aen_smil(media_url, video_id, auth))
info.update({ info.update({
'title': title, 'title': title,
'display_id': graphql_video_id, 'series': result.get('seriesName'),
'_old_archive_ids': [make_archive_id(self, graphql_video_id)], 'season_number': int_or_none(result.get('tvSeasonNumber')),
**traverse_obj(result, { 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
'series': ('series', 'title', {str}),
'season_number': ('tvSeasonNumber', {int_or_none}),
'episode_number': ('tvSeasonEpisodeNumber', {int_or_none}),
}),
}) })
return info return info
@ -155,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE):
(?:shows/[^/?#]+/)?videos/[^/?#]+ (?:shows/[^/?#]+/)?videos/[^/?#]+
)''' )'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'info_dict': { 'info_dict': {
'id': '22253814', 'id': '22253814',
'ext': 'mp4', 'ext': 'mp4',
@ -178,11 +139,11 @@ class AENetworksIE(AENetworksBaseIE):
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This content requires a valid, unexpired auth token', 'skip': 'Geo-restricted - This content is not available in your location.',
}, { }, {
'url': 'https://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': { 'info_dict': {
'id': '147486', 'id': '600587331957',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Inlawful Entry', 'title': 'Inlawful Entry',
'description': 'md5:57c12115a2b384d883fe64ca50529e08', 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
@ -199,8 +160,6 @@ class AENetworksIE(AENetworksBaseIE):
'season_number': 9, 'season_number': 9,
'series': 'Duck Dynasty', 'series': 'Duck Dynasty',
'age_limit': 0, 'age_limit': 0,
'display_id': '600587331957',
'_old_archive_ids': ['aenetworks 600587331957'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
@ -227,7 +186,6 @@ class AENetworksIE(AENetworksBaseIE):
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': '404 Not Found',
}, { }, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story',
'info_dict': { 'info_dict': {
@ -251,7 +209,6 @@ class AENetworksIE(AENetworksBaseIE):
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This content requires a valid, unexpired auth token',
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True, 'only_matching': True,
@ -302,7 +259,7 @@ class AENetworksListBaseIE(AENetworksBaseIE):
domain, slug = self._match_valid_url(url).groups() domain, slug = self._match_valid_url(url).groups()
_, brand, _ = self._DOMAIN_MAP[domain] _, brand, _ = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = f'https://watch.{domain}' base_url = f'http://watch.{domain}'
entries = [] entries = []
for item in (playlist.get(self._ITEMS_KEY) or []): for item in (playlist.get(self._ITEMS_KEY) or []):

View File

@ -29,7 +29,7 @@ class LearningOnScreenIE(InfoExtractor):
}] }]
def _real_initialize(self): def _real_initialize(self):
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-LOS-LIVE'): if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
self.raise_login_required(method='session_cookies') self.raise_login_required(method='session_cookies')
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -25,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
%s %s
} }
}''' % (lookup_key, username, f', slug: "{slug}"' if slug else '', object_fields), # noqa: UP031 }''' % (lookup_key, username, f', slug: "{slug}"' if slug else '', object_fields), # noqa: UP031
}, impersonate=True)['data'][lookup_key] })['data'][lookup_key]
class MixcloudIE(MixcloudBaseIE): class MixcloudIE(MixcloudBaseIE):

View File

@ -33,8 +33,7 @@ class OpencastBaseIE(InfoExtractor):
vid\.igb\.illinois\.edu| vid\.igb\.illinois\.edu|
cursosabertos\.c3sl\.ufpr\.br| cursosabertos\.c3sl\.ufpr\.br|
mcmedia\.missioncollege\.org| mcmedia\.missioncollege\.org|
clases\.odon\.edu\.uy| clases\.odon\.edu\.uy
oc-p\.uni-jena\.de
)''' )'''
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
@ -107,7 +106,7 @@ class OpencastBaseIE(InfoExtractor):
class OpencastIE(OpencastBaseIE): class OpencastIE(OpencastBaseIE):
_VALID_URL = rf'''(?x) _VALID_URL = rf'''(?x)
https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella[0-9]*/ui/watch\.html\? https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\?
(?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})''' (?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})'''
_API_BASE = 'https://%s/search/episode.json?id=%s' _API_BASE = 'https://%s/search/episode.json?id=%s'
@ -132,12 +131,8 @@ class OpencastIE(OpencastBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = self._match_valid_url(url).group('host', 'id') host, video_id = self._match_valid_url(url).group('host', 'id')
response = self._call_api(host, video_id) return self._parse_mediapackage(
package = traverse_obj(response, ( self._call_api(host, video_id)['search-results']['result']['mediapackage'])
('search-results', 'result'),
('result', ...), # Path needed for oc-p.uni-jena.de
'mediapackage', {dict}, any)) or {}
return self._parse_mediapackage(package)
class OpencastPlaylistIE(OpencastBaseIE): class OpencastPlaylistIE(OpencastBaseIE):

View File

@ -6,7 +6,6 @@ import re
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..networking.impersonate import ImpersonateTarget
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -834,30 +833,6 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
'entries': self._entries(base_url, playlist_id), 'entries': self._entries(base_url, playlist_id),
} }
@functools.cached_property
def _browser_impersonate_target(self):
available_targets = self._downloader._get_available_impersonate_targets()
if not available_targets:
# impersonate=True gives a generic warning when no impersonation targets are available
return True
# Any browser target older than chrome-116 is 403'd by Datadome
MIN_SUPPORTED_TARGET = ImpersonateTarget('chrome', '116', 'windows', '10')
version_as_float = lambda x: float(x.version) if x.version else 0
# Always try to use the newest Chrome target available
filtered = sorted([
target[0] for target in available_targets
if target[0].client == 'chrome' and target[0].os in ('windows', 'macos')
], key=version_as_float)
if not filtered or version_as_float(filtered[-1]) < version_as_float(MIN_SUPPORTED_TARGET):
# All available targets are inadequate or newest available Chrome target is too old, so
# warn the user to upgrade their dependency to a version with the minimum supported target
return MIN_SUPPORTED_TARGET
return filtered[-1]
def _entries(self, url, playlist_id): def _entries(self, url, playlist_id):
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated # https://developers.soundcloud.com/blog/offset-pagination-deprecated
@ -872,9 +847,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
try: try:
response = self._call_api( response = self._call_api(
url, playlist_id, query=query, headers=self._HEADERS, url, playlist_id, query=query, headers=self._HEADERS,
note=f'Downloading track page {i + 1}', note=f'Downloading track page {i + 1}')
# See: https://github.com/yt-dlp/yt-dlp/issues/15660
impersonate=self._browser_impersonate_target)
break break
except ExtractorError as e: except ExtractorError as e:
# Downloading page may result in intermittent 502 HTTP error # Downloading page may result in intermittent 502 HTTP error

View File

@ -3,7 +3,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
determine_ext, determine_ext,
merge_dicts, merge_dicts,
parse_duration, parse_duration,
@ -13,7 +12,6 @@ from ..utils import (
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
from ..utils.traversal import find_element, traverse_obj, trim_str
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
@ -124,7 +122,7 @@ class SpankBangIE(InfoExtractor):
}), headers={ }), headers={
'Referer': url, 'Referer': url,
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}, impersonate=True) })
for format_id, format_url in stream.items(): for format_id, format_url in stream.items():
if format_url and isinstance(format_url, list): if format_url and isinstance(format_url, list):
@ -180,9 +178,9 @@ class SpankBangPlaylistIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
country = self.get_param('geo_bypass_country') or 'US'
self._set_cookie('.spankbang.com', 'country', country.upper()) webpage = self._download_webpage(
webpage = self._download_webpage(url, playlist_id, impersonate=True) url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
entries = [self.url_result( entries = [self.url_result(
urljoin(url, mobj.group('path')), urljoin(url, mobj.group('path')),
@ -191,8 +189,8 @@ class SpankBangPlaylistIE(InfoExtractor):
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1', r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1',
webpage)] webpage)]
title = traverse_obj(webpage, ( title = self._html_search_regex(
{find_element(tag='h1', attr='data-testid', value='playlist-title')}, r'<em>([^<]+)</em>\s+playlist\s*<', webpage, 'playlist title',
{clean_html}, {trim_str(end=' Playlist')})) fatal=False)
return self.playlist_result(entries, playlist_id, title) return self.playlist_result(entries, playlist_id, title)

View File

@ -51,8 +51,7 @@ class TruthIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
status = self._download_json( status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id)
f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id, impersonate=True)
uploader_id = strip_or_none(traverse_obj(status, ('account', 'username'))) uploader_id = strip_or_none(traverse_obj(status, ('account', 'username')))
return { return {
'id': video_id, 'id': video_id,

View File

@ -268,7 +268,7 @@ class XHamsterIE(InfoExtractor):
display_id = mobj.group('display_id') or mobj.group('display_id_2') display_id = mobj.group('display_id') or mobj.group('display_id_2')
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
webpage, urlh = self._download_webpage_handle(desktop_url, video_id, impersonate=True) webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
error = self._html_search_regex( error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',