mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-02 07:02:36 +00:00
Compare commits
7 Commits
81bdea03f3
...
1d1358d09f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1d1358d09f | ||
|
|
1fe0bf23aa | ||
|
|
f05e1cd1f1 | ||
|
|
46d5b6f2b7 | ||
|
|
166356d1a1 | ||
|
|
2485653859 | ||
|
|
f532a91cef |
@ -5,10 +5,12 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
@ -29,6 +31,19 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'historyvault.com': (None, 'historyvault', None),
|
||||
'biography.com': (None, 'biography', None),
|
||||
}
|
||||
_GRAPHQL_QUERY = '''
|
||||
query getUserVideo($videoId: ID!) {
|
||||
video(id: $videoId) {
|
||||
title
|
||||
publicUrl
|
||||
programId
|
||||
tvSeasonNumber
|
||||
tvSeasonEpisodeNumber
|
||||
series {
|
||||
title
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {
|
||||
@ -73,19 +88,39 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand, software_statement = self._DOMAIN_MAP[domain]
|
||||
if filter_key == 'canonical':
|
||||
webpage = self._download_webpage(url, filter_value)
|
||||
graphql_video_id = self._search_regex(
|
||||
r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage,
|
||||
'id') or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
|
||||
else:
|
||||
graphql_video_id = filter_value
|
||||
|
||||
result = self._download_json(
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
get_all=False)
|
||||
if not result:
|
||||
'https://yoga.appsvcs.aetnd.com/', graphql_video_id,
|
||||
query={
|
||||
'brand': brand,
|
||||
'mode': 'live',
|
||||
'platform': 'web',
|
||||
},
|
||||
data=json.dumps({
|
||||
'operationName': 'getUserVideo',
|
||||
'variables': {
|
||||
'videoId': graphql_video_id,
|
||||
},
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
result = traverse_obj(result, ('data', 'video', {dict}))
|
||||
media_url = traverse_obj(result, ('publicUrl', {url_or_none}))
|
||||
if not media_url:
|
||||
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
|
||||
video_id=remove_start(filter_value, '/'))
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
video_id = result['programId']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
@ -100,9 +135,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
'display_id': graphql_video_id,
|
||||
'_old_archive_ids': [make_archive_id(self, graphql_video_id)],
|
||||
**traverse_obj(result, {
|
||||
'series': ('series', 'title', {str}),
|
||||
'season_number': ('tvSeasonNumber', {int_or_none}),
|
||||
'episode_number': ('tvSeasonEpisodeNumber', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return info
|
||||
|
||||
@ -116,7 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
(?:shows/[^/?#]+/)?videos/[^/?#]+
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'url': 'https://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
@ -139,11 +178,11 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
'skip': 'This content requires a valid, unexpired auth token',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'url': 'https://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
'id': '600587331957',
|
||||
'id': '147486',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inlawful Entry',
|
||||
'description': 'md5:57c12115a2b384d883fe64ca50529e08',
|
||||
@ -160,6 +199,8 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
'age_limit': 0,
|
||||
'display_id': '600587331957',
|
||||
'_old_archive_ids': ['aenetworks 600587331957'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
@ -186,6 +227,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story',
|
||||
'info_dict': {
|
||||
@ -209,6 +251,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This content requires a valid, unexpired auth token',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True,
|
||||
@ -259,7 +302,7 @@ class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand, _ = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = f'http://watch.{domain}'
|
||||
base_url = f'https://watch.{domain}'
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
|
||||
@ -29,7 +29,7 @@ class LearningOnScreenIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-LOS-LIVE'):
|
||||
self.raise_login_required(method='session_cookies')
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@ -25,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
|
||||
%s
|
||||
}
|
||||
}''' % (lookup_key, username, f', slug: "{slug}"' if slug else '', object_fields), # noqa: UP031
|
||||
})['data'][lookup_key]
|
||||
}, impersonate=True)['data'][lookup_key]
|
||||
|
||||
|
||||
class MixcloudIE(MixcloudBaseIE):
|
||||
|
||||
@ -33,7 +33,8 @@ class OpencastBaseIE(InfoExtractor):
|
||||
vid\.igb\.illinois\.edu|
|
||||
cursosabertos\.c3sl\.ufpr\.br|
|
||||
mcmedia\.missioncollege\.org|
|
||||
clases\.odon\.edu\.uy
|
||||
clases\.odon\.edu\.uy|
|
||||
oc-p\.uni-jena\.de
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
|
||||
@ -106,7 +107,7 @@ class OpencastBaseIE(InfoExtractor):
|
||||
|
||||
class OpencastIE(OpencastBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\?
|
||||
https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella[0-9]*/ui/watch\.html\?
|
||||
(?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})'''
|
||||
|
||||
_API_BASE = 'https://%s/search/episode.json?id=%s'
|
||||
@ -131,8 +132,12 @@ class OpencastIE(OpencastBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).group('host', 'id')
|
||||
return self._parse_mediapackage(
|
||||
self._call_api(host, video_id)['search-results']['result']['mediapackage'])
|
||||
response = self._call_api(host, video_id)
|
||||
package = traverse_obj(response, (
|
||||
('search-results', 'result'),
|
||||
('result', ...), # Path needed for oc-p.uni-jena.de
|
||||
'mediapackage', {dict}, any)) or {}
|
||||
return self._parse_mediapackage(package)
|
||||
|
||||
|
||||
class OpencastPlaylistIE(OpencastBaseIE):
|
||||
|
||||
@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@ -833,6 +834,30 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
|
||||
'entries': self._entries(base_url, playlist_id),
|
||||
}
|
||||
|
||||
@functools.cached_property
|
||||
def _browser_impersonate_target(self):
|
||||
available_targets = self._downloader._get_available_impersonate_targets()
|
||||
if not available_targets:
|
||||
# impersonate=True gives a generic warning when no impersonation targets are available
|
||||
return True
|
||||
|
||||
# Any browser target older than chrome-116 is 403'd by Datadome
|
||||
MIN_SUPPORTED_TARGET = ImpersonateTarget('chrome', '116', 'windows', '10')
|
||||
version_as_float = lambda x: float(x.version) if x.version else 0
|
||||
|
||||
# Always try to use the newest Chrome target available
|
||||
filtered = sorted([
|
||||
target[0] for target in available_targets
|
||||
if target[0].client == 'chrome' and target[0].os in ('windows', 'macos')
|
||||
], key=version_as_float)
|
||||
|
||||
if not filtered or version_as_float(filtered[-1]) < version_as_float(MIN_SUPPORTED_TARGET):
|
||||
# All available targets are inadequate or newest available Chrome target is too old, so
|
||||
# warn the user to upgrade their dependency to a version with the minimum supported target
|
||||
return MIN_SUPPORTED_TARGET
|
||||
|
||||
return filtered[-1]
|
||||
|
||||
def _entries(self, url, playlist_id):
|
||||
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
|
||||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
||||
@ -847,7 +872,9 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
|
||||
try:
|
||||
response = self._call_api(
|
||||
url, playlist_id, query=query, headers=self._HEADERS,
|
||||
note=f'Downloading track page {i + 1}')
|
||||
note=f'Downloading track page {i + 1}',
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/15660
|
||||
impersonate=self._browser_impersonate_target)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
# Downloading page may result in intermittent 502 HTTP error
|
||||
|
||||
@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
@ -12,6 +13,7 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj, trim_str
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
@ -122,7 +124,7 @@ class SpankBangIE(InfoExtractor):
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
}, impersonate=True)
|
||||
|
||||
for format_id, format_url in stream.items():
|
||||
if format_url and isinstance(format_url, list):
|
||||
@ -178,9 +180,9 @@ class SpankBangPlaylistIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
country = self.get_param('geo_bypass_country') or 'US'
|
||||
self._set_cookie('.spankbang.com', 'country', country.upper())
|
||||
webpage = self._download_webpage(url, playlist_id, impersonate=True)
|
||||
|
||||
entries = [self.url_result(
|
||||
urljoin(url, mobj.group('path')),
|
||||
@ -189,8 +191,8 @@ class SpankBangPlaylistIE(InfoExtractor):
|
||||
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1',
|
||||
webpage)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<em>([^<]+)</em>\s+playlist\s*<', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
title = traverse_obj(webpage, (
|
||||
{find_element(tag='h1', attr='data-testid', value='playlist-title')},
|
||||
{clean_html}, {trim_str(end=' Playlist')}))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@ -51,7 +51,8 @@ class TruthIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id)
|
||||
status = self._download_json(
|
||||
f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id, impersonate=True)
|
||||
uploader_id = strip_or_none(traverse_obj(status, ('account', 'username')))
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@ -268,7 +268,7 @@ class XHamsterIE(InfoExtractor):
|
||||
display_id = mobj.group('display_id') or mobj.group('display_id_2')
|
||||
|
||||
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
|
||||
webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
|
||||
webpage, urlh = self._download_webpage_handle(desktop_url, video_id, impersonate=True)
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user