Compare commits

...

16 Commits

Author SHA1 Message Date
bashonly
201812100f
[build] Fix macOS requirements caching (#13328)
Authored by: bashonly
2025-05-28 18:13:48 +00:00
bashonly
cc749a8a3b
[build] Exclude pkg_resources from being collected (#13320)
Closes #13311
Authored by: bashonly
2025-05-27 23:11:58 +00:00
bashonly
f7bbf5a617
[ie/youtube] nsig code improvements and cleanup (#13280)
Authored by: bashonly
2025-05-26 22:54:43 +00:00
Brian
b5be29fa58
[ie/youtube] Fix --mark-watched support (#13222)
Closes #11532
Authored by: iednod55, brian6932

Co-authored-by: iednod55 <210167282+iednod55@users.noreply.github.com>
2025-05-26 22:31:22 +00:00
bashonly
6121559e02 [ie/vice] Mark extractors as broken (#13131)
Authored by: bashonly
2025-05-26 15:57:19 -05:00
Max
2e5bf002da [ie/go] Fix provider-locked content extraction (#13131)
Closes #1770, Closes #8073
Authored by: maxbin123, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-26 15:57:19 -05:00
Max
6693d66033 [ie/aenetworks] Fix provider-locked content extraction (#13131)
Authored by: maxbin123
2025-05-26 15:57:19 -05:00
Max
b094747e93 [ie/WatchESPN] Fix provider-locked content extraction (#13131)
Closes #4662
Authored by: maxbin123
2025-05-26 15:57:19 -05:00
bashonly
98f8eec956 [ie/brightcove:new] Adapt to new AdobePass requirement (#13131)
Authored by: bashonly
2025-05-26 15:57:19 -05:00
bashonly
0daddc780d [ie/turner] Adapt extractors to new AdobePass flow (#13131)
Authored by: bashonly
2025-05-26 15:57:19 -05:00
bashonly
2d7949d564 [ie/nbc] Rework and adapt extractors to new AdobePass flow (#13131)
Closes #1032, Closes #10874, Closes #11148, Closes #12432
Authored by: bashonly
2025-05-26 15:57:19 -05:00
bashonly
ed108b3ea4 [ie/theplatform] Improve metadata extraction (#13131)
Authored by: bashonly
2025-05-26 15:57:19 -05:00
Max
eee90acc47 [ie/adobepass] Add Fubo MSO (#13131)
Closes #8287
Authored by: maxbin123
2025-05-26 15:57:19 -05:00
Max
711c5d5d09 [ie/adobepass] Rework to require software statement (#13131)
* Also removes broken cookie support

Closes #11811
Authored by: maxbin123, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-05-26 15:57:19 -05:00
bashonly
89c1b349ad [ie/adobepass] Validate login URL before sending credentials (#13131)
Authored by: bashonly
2025-05-26 15:57:19 -05:00
bashonly
0ee1102268 [ie/adobepass] Always add newer user-agent when required (#13131)
Fix dcfeea4dd5e5686821350baa6c7767a011944867

Closes #516
Authored by: bashonly
2025-05-26 15:57:19 -05:00
22 changed files with 770 additions and 686 deletions

View File

@ -256,7 +256,7 @@ jobs:
with:
path: |
~/yt-dlp-build-venv
key: cache-reqs-${{ github.job }}
key: cache-reqs-${{ github.job }}-${{ github.ref }}
- name: Install Requirements
run: |
@ -331,19 +331,16 @@ jobs:
if: steps.restore-cache.outputs.cache-hit == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cache_key: cache-reqs-${{ github.job }}
repository: ${{ github.repository }}
branch: ${{ github.ref }}
cache_key: cache-reqs-${{ github.job }}-${{ github.ref }}
run: |
gh extension install actions/gh-actions-cache
gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm
gh cache delete "${cache_key}"
- name: Cache requirements
uses: actions/cache/save@v4
with:
path: |
~/yt-dlp-build-venv
key: cache-reqs-${{ github.job }}
key: cache-reqs-${{ github.job }}-${{ github.ref }}
macos_legacy:
needs: process

View File

@ -2,6 +2,7 @@
set -e
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
python -m devscripts.install_deps -o --include build
python -m devscripts.install_deps --include secretstorage --include curl-cffi
python -m devscripts.make_lazy_extractors
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"

View File

@ -36,6 +36,9 @@ def main():
f'--name={name}',
'--icon=devscripts/logo.ico',
'--upx-exclude=vcruntime140.dll',
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13311
# https://github.com/pyinstaller/pyinstaller/issues/9149
'--exclude-module=pkg_resources',
'--noconfirm',
'--additional-hooks-dir=yt_dlp/__pyinstaller',
*opts,

View File

@ -65,7 +65,7 @@ build = [
"build",
"hatchling",
"pip",
"setuptools>=71.0.2", # 71.0.0 broke pyinstaller
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
"wheel",
]
dev = [

View File

@ -300,7 +300,6 @@ from .brainpop import (
BrainPOPIlIE,
BrainPOPJrIE,
)
from .bravotv import BravoTVIE
from .breitbart import BreitBartIE
from .brightcove import (
BrightcoveLegacyIE,
@ -1262,6 +1261,7 @@ from .nba import (
)
from .nbc import (
NBCIE,
BravoTVIE,
NBCNewsIE,
NBCOlympicsIE,
NBCOlympicsStreamIE,
@ -1269,6 +1269,7 @@ from .nbc import (
NBCSportsStreamIE,
NBCSportsVPlayerIE,
NBCStationsIE,
SyfyIE,
)
from .ndr import (
NDRIE,
@ -2022,7 +2023,6 @@ from .svt import (
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
from .syfy import SyfyIE
from .syvdk import SYVDKIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE

View File

@ -3,6 +3,7 @@ import json
import re
import time
import urllib.parse
import uuid
import xml.etree.ElementTree as etree
from .common import InfoExtractor
@ -10,6 +11,7 @@ from ..networking.exceptions import HTTPError
from ..utils import (
NO_DEFAULT,
ExtractorError,
parse_qs,
unescapeHTML,
unified_timestamp,
urlencode_postdata,
@ -45,6 +47,8 @@ MSO_INFO = {
'name': 'Comcast XFINITY',
'username_field': 'user',
'password_field': 'passwd',
'login_hostname': 'login.xfinity.com',
'needs_newer_ua': True,
},
'TWC': {
'name': 'Time Warner Cable | Spectrum',
@ -74,6 +78,12 @@ MSO_INFO = {
'name': 'Verizon FiOS',
'username_field': 'IDToken1',
'password_field': 'IDToken2',
'login_hostname': 'ssoauth.verizon.com',
},
'Fubo': {
'name': 'Fubo',
'username_field': 'username',
'password_field': 'password',
},
'Cablevision': {
'name': 'Optimum/Cablevision',
@ -1338,6 +1348,7 @@ MSO_INFO = {
'name': 'Sling TV',
'username_field': 'username',
'password_field': 'password',
'login_hostname': 'identity.sling.com',
},
'Suddenlink': {
'name': 'Suddenlink',
@ -1355,7 +1366,6 @@ MSO_INFO = {
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
_MVPD_CACHE = 'ap-mvpd'
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
@ -1367,6 +1377,14 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
return super()._download_webpage_handle(
*args, **kwargs)
@staticmethod
def _get_mso_headers(mso_info):
# yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0',
} if mso_info.get('needs_newer_ua') else {}
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
channel = etree.Element('channel')
@ -1382,7 +1400,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
resource_rating.text = rating
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement):
mso_id = self.get_param('ap_mso')
if mso_id:
mso_info = MSO_INFO[mso_id]
else:
mso_info = {}
def xml_text(xml_str, tag):
return self._search_regex(
f'<{tag}>(.+?)</{tag}>', xml_str, tag)
@ -1391,15 +1415,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
return token_expires and token_expires <= int(time.time())
def post_form(form_page_res, note, data={}):
def post_form(form_page_res, note, data={}, validate_url=False):
form_page, urlh = form_page_res
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
if not re.match(r'https?://', post_url):
post_url = urllib.parse.urljoin(urlh.url, post_url)
if validate_url:
# This request is submitting credentials so we should validate it when possible
url_parsed = urllib.parse.urlparse(post_url)
expected_hostname = mso_info.get('login_hostname')
if expected_hostname and expected_hostname != url_parsed.hostname:
raise ExtractorError(
f'Unexpected login URL hostname; expected "{expected_hostname}" but got '
f'"{url_parsed.hostname}". Aborting before submitting credentials')
if url_parsed.scheme != 'https':
self.write_debug('Upgrading login URL scheme to https')
post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https'))
form_data = self._hidden_inputs(form_page)
form_data.update(data)
return self._download_webpage_handle(
post_url, video_id, note, data=urlencode_postdata(form_data), headers={
**self._get_mso_headers(mso_info),
'Content-Type': 'application/x-www-form-urlencoded',
})
@ -1432,40 +1468,72 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}
guid = xml_text(resource, 'guid') if '<' in resource else resource
count = 0
while count < 2:
for _ in range(2):
requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
authn_token = None
if not authn_token:
mso_id = self.get_param('ap_mso')
if mso_id:
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
if not username or not password:
raise_mvpd_required()
mso_info = MSO_INFO[mso_id]
provider_redirect_page_res = self._download_webpage_handle(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
}, headers={
# yt-dlp's default user-agent is usually too old for Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
'User-Agent': self._MODERN_USER_AGENT,
} if mso_id == 'Comcast_SSO' else None)
elif not self._cookies_passed:
if not mso_id:
raise_mvpd_required()
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
if not username or not password:
raise_mvpd_required()
if not mso_id:
pass
elif mso_id == 'Comcast_SSO':
device_info, urlh = self._download_json_handle(
'https://sp.auth.adobe.com/indiv/devices',
video_id, 'Registering device with Adobe',
data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(),
headers={'Content-Type': 'application/json; charset=UTF-8'})
device_id = device_info['deviceId']
mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp')
mvpd_headers['Ap_21'] = device_id
registration = self._download_json(
'https://sp.auth.adobe.com/o/client/register',
video_id, 'Registering client with Adobe',
data=json.dumps({'software_statement': software_statement}).encode(),
headers={'Content-Type': 'application/json; charset=UTF-8'})
access_token = self._download_json(
'https://sp.auth.adobe.com/o/client/token', video_id,
'Obtaining access token', data=urlencode_postdata({
'grant_type': 'client_credentials',
'client_id': registration['client_id'],
'client_secret': registration['client_secret'],
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
})['access_token']
mvpd_headers['Authorization'] = f'Bearer {access_token}'
reg_code = self._download_json(
f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode',
video_id, 'Obtaining registration code',
data=urlencode_postdata({
'requestor': requestor_id,
'deviceId': device_id,
'format': 'json',
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Authorization': f'Bearer {access_token}',
})['code']
provider_redirect_page_res = self._download_webpage_handle(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
'reg_code': reg_code,
}, headers=self._get_mso_headers(mso_info))
if mso_id == 'Comcast_SSO':
# Comcast page flow varies by video site and whether you
# are on Comcast's network.
provider_redirect_page, urlh = provider_redirect_page_res
@ -1489,8 +1557,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
oauth_redirect_url = extract_redirect_url(
provider_redirect_page, fatal=True)
provider_login_page_res = self._download_webpage_handle(
oauth_redirect_url, video_id,
self._DOWNLOADING_LOGIN_PAGE)
oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE,
headers=self._get_mso_headers(mso_info))
else:
provider_login_page_res = post_form(
provider_redirect_page_res,
@ -1500,7 +1568,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
}, validate_url=True)
mvpd_confirm_page, urlh = mvpd_confirm_page_res
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1539,7 +1607,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
provider_redirect_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
}, validate_url=True)
saml_login_page, urlh = saml_login_page_res
if 'Please try again.' in saml_login_page:
raise ExtractorError(
@ -1560,7 +1628,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
[saml_login_page, saml_redirect_url], 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
}, validate_url=True)
if 'Please try again.' in saml_login_page:
raise ExtractorError(
'Failed to login, incorrect User ID or Password.')
@ -1631,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
}, validate_url=True)
provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.url)
@ -1682,7 +1750,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
}, validate_url=True)
provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.url)
@ -1699,6 +1767,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
elif mso_id == 'Fubo':
_, urlh = provider_redirect_page_res
fubo_response = self._download_json(
'https://api.fubo.tv/partners/tve/connect', video_id,
'Authenticating with Fubo', 'Unable to authenticate with Fubo',
query=parse_qs(urlh.url), data=json.dumps({
'username': username,
'password': password,
}).encode(), headers={
'Accept': 'application/json',
'Content-Type': 'application/json',
})
self._request_webpage(
'https://sp.auth.adobe.com/adobe-services/oauth2', video_id,
'Authenticating with Adobe', 'Failed to authenticate with Adobe',
query={
'code': fubo_response['code'],
'state': fubo_response['state'],
})
else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
@ -1717,7 +1806,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}
if mso_id in ('Cablevision', 'AlticeOne'):
form_data['_eventId_proceed'] = ''
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
mvpd_confirm_page_res = post_form(
provider_login_page_res, 'Logging in', form_data, validate_url=True)
if mso_id != 'Rogers':
post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1727,6 +1817,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
'reg_code': reg_code,
}), headers=mvpd_headers)
except ExtractorError as e:
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
@ -1734,7 +1825,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
raise
if '<pendingLogout' in session:
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
@ -1755,7 +1845,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
@ -1778,6 +1867,5 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
return short_authorize

View File

@ -84,6 +84,8 @@ class AdultSwimIE(TurnerBaseIE):
'skip': '404 Not Found',
}]
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwNjg5ZmU2My00OTc5LTQxZmQtYWYxNC1hYjVlNmJjNWVkZWIiLCJuYmYiOjE1MzcxOTA2NzQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwNjc0fQ.Xl3AEduM0s1TxDQ6-XssdKIiLm261hhsEv1C1yo_nitIajZThSI9rXILqtIzO0aujoHhdzUnu_dUCq9ffiSBzEG632tTa1la-5tegHtce80cMhewBN4n2t8n9O5tiaPx8MPY8ALdm5wS7QzWE6DO_LTJKgE8Bl7Yv-CWJT4q4SywtNiQWLVOuhBRnDyfsRezxRwptw8qTn9dv5ZzUrVJaby5fDZ_nOncMKvegOgaKd5KEuCAGQ-mg-PSuValMjGuf6FwDguGaK7IyI5Y2oOrzXmD4Dj7q4WBg8w9QoZhtLeAU56mcsGILolku2R5FHlVLO9xhjResyt-pfmegOkpSw'
def _real_extract(self, url):
show_path, episode_path = self._match_valid_url(url).groups()
display_id = episode_path or show_path
@ -152,7 +154,7 @@ class AdultSwimIE(TurnerBaseIE):
# CDN_TOKEN_APP_ID from:
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
}, {
}, self._SOFTWARE_STATEMENT, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': auth,

View File

@ -20,13 +20,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_THEPLATFORM_KEY = '43jXaGRQud'
_THEPLATFORM_SECRET = 'S10BPXHMlb'
_DOMAIN_MAP = {
'history.com': ('HISTORY', 'history'),
'aetv.com': ('AETV', 'aetv'),
'mylifetime.com': ('LIFETIME', 'lifetime'),
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
'fyi.tv': ('FYI', 'fyi'),
'historyvault.com': (None, 'historyvault'),
'biography.com': (None, 'biography'),
'history.com': ('HISTORY', 'history', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzZlMTQ3ZS0zMzFhLTQxY2YtYTMwNC01MDA2NzNlOGYwYjYiLCJuYmYiOjE1Mzg2NjMzMDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMzA5fQ.n24-FVHLGXJe2D4atIQZ700aiXKIajKh5PWFoHJ40Az4itjtwwSFHnvufnoal3T8lYkwNLxce7H-IEGxIykRkZEdwq09pMKMT-ft9ASzE4vQ8fAWbf5ZgDME86x4Jq_YaxkRc9Ne0eShGhl8fgTJHvk07sfWcol61HJ7kU7K8FzzcHR0ucFQgA5VNd8RyjoGWY7c6VxnXR214LOpXsywmit04-vGJC102b_WA2EQfqI93UzG6M6l0EeV4n0_ijP3s8_i8WMJZ_uwnTafCIY6G_731i01dKXDLSFzG1vYglAwDa8DTcdrAAuIFFDF6QNGItCCmwbhjufjmoeVb7R1Gg'),
'aetv.com': ('AETV', 'aetv', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI5Y2IwNjg2Yy03ODUxLTRiZDUtODcyMC00MjNlZTg1YTQ1NzMiLCJuYmYiOjE1Mzg2NjMyOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMjkwfQ.T5Elf0X4TndO4NEgqBas1gDxNHGPVk_daO2Ha5FBzVO6xi3zM7eavdAKfYMCN7gpWYJx03iADaVPtczO_t_aGZczDjpwJHgTUzDgvcLZAVsVDqtDIAMy3S846rPgT6UDbVoxurA7B2VTPm9phjrSXhejvd0LBO8MQL4AZ3sy2VmiPJ2noT1ily5PuHCYlkrT1fheO064duR__Cd9DQ5VTMnKjzY3Cx345CEwKDkUk5gwgxhXM-aY0eblehrq8VD81_aRM_O3tvh7nbTydHOnUpV-k_iKVi49gqz7Sf8zb6Zh5z2Uftn3vYCfE5NQuesitoRMnsH17nW7o_D59hkRgg'),
'mylifetime.com': ('LIFETIME', 'lifetime', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJmODg0MDM1ZC1mZGRmLTRmYjgtYmRkMC05MzRhZDdiYTAwYTciLCJuYmYiOjE1NDkzOTI2NDQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQ5MzkyNjQ0fQ.vkTIaCpheKdKQd__2-3ec4qkcpbAhyCTvwe5iTl922ItSQfVhpEJG4wseVSNmBTrpBi0hvLedcw6Hj1_UuzBMVuVcCqLprU-pI8recEwL0u7G-eVkylsxe1OTUm1o3V6OykXQ9KlA-QQLL1neUhdhR1n5B1LZ4cmtBmiEpfgf4rFwXD1ScFylIcaWKLBqHoRBNUmxyTmoXXvn_A-GGSj9eCizFzY8W5uBwUcsoiw2Cr1skx7PbB2RSP1I5DsoIJKG-8XV1KS7MWl-fNLjE-hVAsI9znqfEEFcPBiv3LhCP4Nf4OIs7xAselMn0M0c8igRUZhURWX_hdygUAxkbKFtQ'),
'fyi.tv': ('FYI', 'fyi', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxOGZiOWM3Ny1mYmMzLTQxYTktYmE1Yi1lMzM0ZmUzNzU4NjEiLCJuYmYiOjE1ODc1ODAzNzcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NTgwMzc3fQ.AYDuipKswmIfLBfOjHRsfc5fMV5NmJUmiJnkpiep4VEw9QiXkygFj4bN06Si5tFc5Mee5TDrGzDpV6iuKbVpLT5kuqXhAn-Wozf5zKPsg_IpdEKO7gsiCq4calt72ct44KTqtKD_hVcoxQU24_HaJsRgXzu3B-6Ff6UrmsXkyvYifYVC9v2DSkdCuA02_IrlllzVT2kRuefUXgL4vQRtTFf77uYa0RKSTG7uVkiQ_AU41eXevKlO2qgtc14Hk5cZ7-ZNrDyMCXYA5ngdIHP7Gs9PWaFXT36PFHI_rC4EfxUABPzjQFxjpP75aX5qn8SH__HbM9q3hoPWgaEaf76qIQ'),
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc', None),
'historyvault.com': (None, 'historyvault', None),
'biography.com': (None, 'biography', None),
}
def _extract_aen_smil(self, smil_url, video_id, auth=None):
@ -71,7 +71,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
}
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
requestor_id, brand = self._DOMAIN_MAP[domain]
requestor_id, brand, software_statement = self._DOMAIN_MAP[domain]
result = self._download_json(
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
filter_value, query={f'filter[{filter_key}]': filter_value})
@ -95,7 +95,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
url, video_id, requestor_id, resource, software_statement)
info.update(self._extract_aen_smil(media_url, video_id, auth))
info.update({
'title': title,
@ -132,10 +132,11 @@ class AENetworksIE(AENetworksBaseIE):
'tags': 'count:14',
'categories': ['Mountain Men'],
'episode_number': 1,
'episode': 'Episode 1',
'episode': 'Winter Is Coming',
'season': 'Season 1',
'season_number': 1,
'series': 'Mountain Men',
'age_limit': 0,
},
'params': {
# m3u8 download
@ -157,18 +158,18 @@ class AENetworksIE(AENetworksBaseIE):
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'tags': 'count:23',
'episode': 'Episode 1',
'episode': 'Inlawful Entry',
'episode_number': 1,
'season': 'Season 9',
'season_number': 9,
'series': 'Duck Dynasty',
'age_limit': 0,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.',
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True,

View File

@ -1,188 +0,0 @@
from .adobepass import AdobePassIE
from ..networking import HEADRequest
from ..utils import (
extract_attributes,
float_or_none,
get_element_html_by_class,
int_or_none,
merge_dicts,
parse_age_limit,
remove_end,
str_or_none,
traverse_obj,
unescapeHTML,
unified_timestamp,
update_url_query,
url_or_none,
)
class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'info_dict': {
'id': '3923059',
'ext': 'mp4',
'title': 'The Top Chef Season 16 Winner Is...',
'description': 'Find out who takes the title of Top Chef!',
'upload_date': '20190314',
'timestamp': 1552591860,
'season_number': 16,
'episode_number': 15,
'series': 'Top Chef',
'episode': 'The Top Chef Season 16 Winner Is...',
'duration': 190.357,
'season': 'Season 16',
'thumbnail': r're:^https://.+\.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
'info_dict': {
'id': '9000234570',
'ext': 'mp4',
'title': 'London Calling',
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
'upload_date': '20230310',
'timestamp': 1678410000,
'season_number': 20,
'episode_number': 1,
'series': 'Top Chef',
'episode': 'London Calling',
'duration': 3266.03,
'season': 'Season 20',
'chapters': 'count:7',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
'info_dict': {
'id': '3692045',
'ext': 'mp4',
'title': 'Closing Night',
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
'upload_date': '20180401',
'timestamp': 1522623600,
'season_number': 1,
'episode_number': 1,
'series': 'In Ice Cold Blood',
'episode': 'Closing Night',
'duration': 2629.051,
'season': 'Season 1',
'chapters': 'count:6',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
'info_dict': {
'id': '3974019',
'ext': 'mp4',
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
'upload_date': '20190617',
'timestamp': 1560790800,
'season_number': 2,
'episode_number': 16,
'series': 'In Ice Cold Blood',
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
'duration': 68.235,
'season': 'Season 2',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
'only_matching': True,
}]
def _real_extract(self, url):
site, display_id = self._match_valid_url(url).group('site', 'id')
webpage = self._download_webpage(url, display_id)
settings = self._search_json(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
query = {
'manifest': 'm3u',
'formats': 'm3u,mpeg4',
}
if tve:
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
account_id = tve['data-mpx-media-account-id']
metadata = self._parse_json(
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
video_id = tve.get('data-guid') or metadata['guid']
if tve.get('data-entitlement') == 'auth':
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
site = remove_end(site, 'tv')
release_pid = tve['data-release-pid']
resource = self._get_mvpd_resource(
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
tve['data-title'], release_pid, tve.get('data-rating'))
query.update({
'switch': 'HLSServiceSecure',
'auth': self._extract_mvpd_auth(
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
})
else:
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
account_id = ls_playlist['mpxMediaAccountId']
video_id = ls_playlist['defaultGuid']
metadata = traverse_obj(
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {float_or_none(scale=1000)}),
}))
# prune pointless single chapters that span the entire duration from short videos
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
chapters = None
m3u8_url = self._request_webpage(HEADRequest(
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
if 'mpeg_cenc' in m3u8_url:
self.report_drm(video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'chapters': chapters,
**merge_dicts(traverse_obj(tp_metadata, {
'title': 'title',
'description': 'description',
'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
}, get_all=False), traverse_obj(metadata, {
'title': 'title',
'description': 'description',
'duration': ('durationInSeconds', {int_or_none}),
'timestamp': ('airDate', {unified_timestamp}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'season_number': ('seasonNumber', {int_or_none}),
'episode_number': ('episodeNumber', {int_or_none}),
'episode': 'episodeTitle',
'series': 'show',
})),
}

View File

@ -923,10 +923,18 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
missing_fields = ', '.join(
key for key in ('source_url', 'software_statement') if not smuggled_data.get(key))
if missing_fields:
raise ExtractorError(
f'Missing fields in smuggled data: {missing_fields}. '
f'This video can be only extracted from the webpage where it is embedded. '
f'Pass the URL of the embedding webpage instead of the Brightcove URL', expected=True)
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
custom_fields['bcadobepassresourceid'],
smuggled_data['software_statement'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': f'application/json;pk={policy_key}',

View File

@ -329,6 +329,7 @@ class WatchESPNIE(AdobePassIE):
}]
_API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ'
def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
if 'Authorization' not in headers:
@ -405,8 +406,8 @@ class WatchESPNIE(AdobePassIE):
# TV Provider required
else:
resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None)
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode()
asset = self._download_json(
f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',

View File

@ -7,161 +7,157 @@ from ..utils import (
int_or_none,
join_nonempty,
parse_age_limit,
remove_end,
remove_start,
traverse_obj,
try_get,
unified_timestamp,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class GoIE(AdobePassIE):
_SITE_INFO = {
'abc': {
'brand': '001',
'requestor_id': 'ABC',
'requestor_id': 'dtci',
'provider_id': 'ABC',
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4OTcwMjlkYS0yYjM1LTQyOWUtYWQ0NS02ZjZiZjVkZTdhOTUiLCJuYmYiOjE2MjAxNzM5NjksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjIwMTczOTY5fQ.SC69DVJWSL8sIe-vVUrP6xS_kzHKqwz9PdKYexs_y-f7Vin6mM-7S-W1TE_-K55O0pyf-TL4xYgvm6LIye8CckG-nZfVwNPV4huduov0jmIcxCQFeUwkHULG2IaA44wfBVUBdaHgkhPweZ2amjycO_IXtez-gBXOLbE3B7Gx9j_5ISCFtyVUblThKfoGyQv6KT6t8Vpmc4ZSKCCQp74KWFFypydb9ucego1taW_nQD06Cdf4yByLd6NaTBceMcIKbug9b9gxFm3XBgJ5q3z7KGo1Kr6XalAV5j4m-fQ91wczlTilX8FM4AljMupyRM9mA_aEADILQ4hS79q4SM0w6w',
},
'freeform': {
'brand': '002',
'requestor_id': 'ABCFamily',
},
'watchdisneychannel': {
'brand': '004',
'resource_id': 'Disney',
},
'watchdisneyjunior': {
'brand': '008',
'resource_id': 'DisneyJunior',
},
'watchdisneyxd': {
'brand': '009',
'resource_id': 'DisneyXD',
'provider_id': 'ABCFamily',
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZWM2MGYyNC0xYzRjLTQ1NzQtYjc0Zi03ZmM4N2E5YWMzMzgiLCJuYmYiOjE1ODc2NjU5MjMsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NjY1OTIzfQ.flCn3dhvmvPnWmV0JV8Fm0YFyj07yPez9-n1GFEwVIm_S2wQVWbWyJhqsAyLZVFrhOMZYTqmPS3OHxGwTwXkEYn6PD7o_vIVG3oqi-Xn1m5jRt_Gazw5qEtpat6VE7bvKGSD3ZhcidOrsCk8NcYyq75u61NHDvSl81pcedJjVRVUpsqrEwmo0aVbA0C8PX3ri0mEbGvkMKvHn8E60xp-PSE-VK8SDT0plwPu_TwUszkZ6-_I8_2xcv_WBqcXFkAVg7Q-iNJXgQvmNsrpcrYuLvi6hEH4ZLtoDcXU6MhwTQAJTiHSo8x9aHX1_qFP09CzlNOFQbC2ZEJdP9SvA53SLQ',
},
'disneynow': {
'brand': '011',
'brand': '011', # also: '004', '008', '009'
'requestor_id': 'DisneyChannels',
'provider_id': 'DisneyChannels',
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzAzNTRiOS04NDNiLTRkNjAtYTQ3ZS0yNzk1MzlkOTIyNTciLCJuYmYiOjE1NTg5ODc0NDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTU4OTg3NDQ5fQ.Jud6YS6-J2h0h6po0oMheDym0qRTJQGj4kzacrz4DFuEwhcBkkykW6pF5pKuAUJy9HCZ40oDAHe2KcTlDJjCZF5tDaUEfdihakZ9cC_rG7MU-QoRne8qaB_dPDKwGuk-ZyWD8eV3zwTJmbGo8hDxYTEU81YNCxwhyc_BPDr5TYiubbmpP3_pTnXmSpuL58isJ2peSKWlX9BacuXtBY25c_QnPFKk-_EETm7IHkTpDazde1QfHWGu4s4yJpKGk8RVVujVG6h6ELlL-ZeYLilBm7iS7h1TYG1u7fJhyZRL7isaom6NvAzsvN3ngss1fLwt8decP8wzdFHrbYTdTjW8qw',
'resource_id': 'Disney',
},
'fxnow.fxnetworks': {
'brand': '025',
'fxnetworks': {
'brand': '025', # also: '020'
'requestor_id': 'dtci',
'provider_id': 'fx', # also 'fxx', 'fxm'
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIzYWRhYWZiNC02OTAxLTRlYzktOTdmNy1lYWZkZTJkODJkN2EiLCJuYmYiOjE1NjIwMjQwNzYsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDI0MDc2fQ.dhKMpZK50AObbZYrMiYPSfWtzXHUaeMP3jrIY4Cgfvh0GaEgk0Mns_zp78jypFeZgRtPVleQMQDNq2YEloRLcAGqP1aa6WVDglnK77ZWUm4IKai14Rwf3A6YBhSRoO2_lMmUGkuTf6gZY-kMIPqBYKqzTQiQl4HbniPFodIzFRiuI9QJVrkoyTGrJL4oqiX08PoFI3Z-TOti1Heu3EbFC-GveQHhlinYrzU7rbiAqLEz7FImtfBDsnXX1Y3uJDLYM3Bq4Oh0nrzTv1Fd62wNsCNErHHIbELidh1zZF0ujvt7ReuZUwAitm0UhEJ7OxNOUbEQWtae6pVNscvdvTFMpg',
},
'nationalgeographic': {
'brand': '026', # also '023'
'requestor_id': 'dtci',
'provider_id': 'ngc', # also 'ngw'
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMzE4YTM1Ni05Mjc4LTQ4NjEtYTFmNi1jMTIzMzg1ZWMzYzMiLCJuYmYiOjE1NjIwMjM4MjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDIzODI4fQ.Le-2OzF9-jrhJ7ZfWtLWk5iSHGVZoxeU1w0_fO--Heli0OwRZsRq2slSmx-oZTzxuWmAgDEiBkWSDcDK6sM25DrCLsdsJa3MBuZ-slBRtH8aq3HpNoqqLkU-vg6gRUEKMtwBUtwCu_9aKUCayYtndWv4b1DjVQeSrteOW5NNudWVYleAe0kxeNJQHo5If9SCzDudKVJktFUjhNks4QPOC_uONPkRRlL9D0fNvtOY-LRFckfcHhf5z9l1iZjeukV0YhdKnuw1wyiaWrQXBUDiBfbkCRd2DM-KnelqPxfiXCaTjGKDURRBO3pz33ebge3IFXSiU5vl4qHQ8xvunzGpFw',
},
}
_VALID_URL = r'''(?x)
https?://
(?P<sub_domain>
(?:{}\.)?go|fxnow\.fxnetworks|
(?:www\.)?(?:abc|freeform|disneynow)
)\.com/
(?:
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
)
'''.format(r'\.|'.join(list(_SITE_INFO.keys())))
_URL_PATH_RE = r'(?:video|episode|movies-and-specials)/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_VALID_URL = [
fr'https?://(?:www\.)?(?P<site>abc)\.com/{_URL_PATH_RE}',
fr'https?://(?:www\.)?(?P<site>freeform)\.com/{_URL_PATH_RE}',
fr'https?://(?:www\.)?(?P<site>disneynow)\.com/{_URL_PATH_RE}',
fr'https?://fxnow\.(?P<site>fxnetworks)\.com/{_URL_PATH_RE}',
fr'https?://(?:www\.)?(?P<site>nationalgeographic)\.com/tv/{_URL_PATH_RE}',
]
_TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'url': 'https://abc.com/episode/4192c0e6-26e5-47a8-817b-ce8272b9e440/playlist/PL551127435',
'info_dict': {
'id': 'VDKA3807643',
'id': 'VDKA10805898',
'ext': 'mp4',
'title': 'The Traitor in the White House',
'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'This content is no longer available.',
}, {
'url': 'https://disneynow.com/shows/big-hero-6-the-series',
'info_dict': {
'title': 'Doraemon',
'id': 'SH55574025',
},
'playlist_mincount': 51,
}, {
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
'info_dict': {
'id': 'VDKA3609139',
'title': 'This Guilty Blood',
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
'title': 'Switch the Flip',
'description': 'To help get Brians life in order, Stewie and Brian swap bodies using a machine that Stewie invents.',
'age_limit': 14,
'duration': 1297,
'thumbnail': r're:https?://.+/.+\.jpg',
'series': 'Family Guy',
'season': 'Season 16',
'season_number': 16,
'episode': 'Episode 17',
'episode_number': 17,
'timestamp': 1746082800.0,
'upload_date': '20250501',
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://disneynow.com/episode/21029660-ba06-4406-adb0-a9a78f6e265e/playlist/PL553044961',
'info_dict': {
'id': 'VDKA39546942',
'ext': 'mp4',
'title': 'Zero Friends Again',
'description': 'Relationships fray under the pressures of a difficult journey.',
'age_limit': 0,
'duration': 1721,
'thumbnail': r're:https?://.+/.+\.jpg',
'series': 'Star Wars: Skeleton Crew',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 6',
'episode_number': 6,
'timestamp': 1746946800.0,
'upload_date': '20250511',
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://fxnow.fxnetworks.com/episode/09f4fa6f-c293-469e-aebe-32c9ca5842a7/playlist/PL554408064',
'info_dict': {
'id': 'VDKA38112033',
'ext': 'mp4',
'title': 'The Return of Jerry',
'description': 'The vampires long-lost fifth roommate returns. Written by Paul Simms; directed by Kyle Newacheck.',
'age_limit': 17,
'duration': 1493,
'thumbnail': r're:https?://.+/.+\.jpg',
'series': 'What We Do in the Shadows',
'season': 'Season 6',
'season_number': 6,
'episode': 'Episode 1',
'upload_date': '20170102',
'season': 'Season 2',
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg',
'duration': 2544,
'season_number': 2,
'series': 'Shadowhunters',
'episode_number': 1,
'timestamp': 1483387200,
'ext': 'mp4',
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
'timestamp': 1729573200.0,
'upload_date': '20241022',
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock',
'url': 'https://www.freeform.com/episode/bda0eaf7-761a-4838-aa44-96f794000844/playlist/PL553044961',
'info_dict': {
'id': 'VDKA26050359',
'title': 'The Knock',
'description': 'md5:0c2947e3ada4c31f28296db7db14aa64',
'age_limit': 14,
'id': 'VDKA39007340',
'ext': 'mp4',
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg',
'episode': 'Episode 12',
'season_number': 4,
'season': 'Season 4',
'timestamp': 1642975200,
'episode_number': 12,
'upload_date': '20220123',
'series': 'The Rookie',
'duration': 2572,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
'title': 'Angel\'s Landing',
'description': 'md5:91bf084e785c968fab16734df7313446',
'age_limit': 14,
'duration': 2523,
'thumbnail': r're:https?://.+/.+\.jpg',
'series': 'How I Escaped My Cult',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 2',
'episode_number': 2,
'timestamp': 1740038400.0,
'upload_date': '20250220',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
'url': 'https://www.nationalgeographic.com/tv/episode/ca694661-1186-41ae-8089-82f64d69b16d/playlist/PL554408064',
'info_dict': {
'id': 'VDKA12782841',
'title': 'First Look: Better Things - Season 2',
'description': 'md5:fa73584a95761c605d9d54904e35b407',
'id': 'VDKA39492078',
'ext': 'mp4',
'age_limit': 14,
'upload_date': '20170825',
'duration': 161,
'series': 'Better Things',
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg',
'timestamp': 1503661074,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
'title': 'Heart of the Emperors',
'description': 'md5:4fc50a2878f030bb3a7eac9124dca677',
'age_limit': 0,
'duration': 2775,
'thumbnail': r're:https?://.+/.+\.jpg',
'series': 'Secrets of the Penguins',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1745204400.0,
'upload_date': '20250421',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'url': 'https://www.freeform.com/movies-and-specials/c38281fc-9f8f-47c7-8220-22394f9df2e1',
'only_matching': True,
}, {
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
'only_matching': True,
}, {
# brand 004
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
'only_matching': True,
}, {
# brand 008
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
}, {
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
}, {
'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
'url': 'https://abc.com/video/219a454a-172c-41bf-878a-d169e6bc0bdc/playlist/PL5523098420',
'only_matching': True,
}]
@ -171,58 +167,29 @@ class GoIE(AdobePassIE):
f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json',
display_id)['video']
def _extract_global_var(self, name, webpage, video_id):
return self._search_json(
fr'window\[["\']{re.escape(name)}["\']\]\s*=',
webpage, f'{name.strip("_")} JSON', video_id)
def _real_extract(self, url):
mobj = self._match_valid_url(url)
sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
video_id, display_id = mobj.group('id', 'display_id')
site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
data = self._parse_json(
self._search_regex(
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
'data', default='{}'),
display_id or video_id, fatal=False)
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
video_id = None
if layout:
video_id = try_get(
layout,
(lambda x: x['videoid'], lambda x: x['video']['id']),
str)
if not video_id:
video_id = self._search_regex(
(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)',
# page.analytics.videoIdCode
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)',
), webpage, 'video id', default=video_id)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
default='004')
site_info = next(
si for _, si in self._SITE_INFO.items()
if si.get('brand') == brand)
if not video_id:
# show extraction works for Disney, DisneyJunior and DisneyXD
# ABC and Freeform has different layout
show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
videos = self._extract_videos(brand, show_id=show_id)
show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
entries = []
for video in videos:
entries.append(self.url_result(
video['url'], 'Go', video.get('id'), video.get('title')))
entries.reverse()
return self.playlist_result(entries, show_id, show_title)
site, display_id = self._match_valid_url(url).group('site', 'id')
webpage = self._download_webpage(url, display_id)
config = self._extract_global_var('__CONFIG__', webpage, display_id)
data = self._extract_global_var(config['globalVar'], webpage, display_id)
video_id = traverse_obj(data, (
'page', 'content', 'video', 'layout', (('video', 'id'), 'videoid'), {str}, any))
if not video_id:
video_id = self._search_regex([
# data-track-video_id="VDKA39492078"
# data-track-video_id_code="vdka39492078"
# data-video-id="'VDKA3609139'"
r'data-(?:track-)?video[_-]id(?:_code)?=["\']*((?:vdka|VDKA)\d+)',
# page.analytics.videoIdCode
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\d+)'], webpage, 'video ID')
site_info = self._SITE_INFO[site]
brand = site_info['brand']
video_data = self._extract_videos(brand, video_id)[0]
video_id = video_data['id']
title = video_data['title']
@ -238,26 +205,31 @@ class GoIE(AdobePassIE):
if ext == 'm3u8':
video_type = video_data.get('type')
data = {
'video_id': video_data['id'],
'video_id': video_id,
'video_type': video_type,
'brand': brand,
'device': '001',
'app_name': 'webplayer-abc',
}
if video_data.get('accesslevel') == '1':
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
provider_id = site_info['provider_id']
software_statement = traverse_obj(data, ('app', 'config', (
('features', 'auth', 'softwareStatement'),
('tvAuth', 'SOFTWARE_STATEMENTS', 'PRODUCTION'),
), {str}, any)) or site_info['software_statement']
resource = site_info.get('resource_id') or self._get_mvpd_resource(
requestor_id, title, video_id, None)
provider_id, title, video_id, None)
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
url, video_id, site_info['requestor_id'], resource, software_statement)
data.update({
'token': auth,
'token_type': 'ap',
'adobe_requestor_id': requestor_id,
'adobe_requestor_id': provider_id,
})
else:
self._initialize_geo_bypass({'countries': ['US']})
entitlement = self._download_json(
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
'https://prod.gatekeeper.us-abc.symphony.edgedatg.go.com/vp2/ws-secure/entitlement/2020/playmanifest_secure.json',
video_id, data=urlencode_postdata(data))
errors = entitlement.get('errors', {}).get('errors', [])
if errors:
@ -267,7 +239,7 @@ class GoIE(AdobePassIE):
error['message'], countries=['US'])
error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey']
asset_url += '?' + entitlement['entitlement']['uplynkData']['sessionKey']
fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
formats.extend(fmts)

View File

@ -19,7 +19,8 @@ from ..utils import (
class NBACVPBaseIE(TurnerBaseIE):
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
return self._extract_cvp_info(
f'http://secure.nba.com/{path}', video_id, {
# XXX: The 3rd argument (None) needs to be the AdobePass software_statement
f'http://secure.nba.com/{path}', video_id, None, {
'default': {
'media_src': 'http://nba.cdn.turner.com/nba/big',
},
@ -94,6 +95,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
class NBAWatchEmbedIE(NBAWatchBaseIE):
_WORKING = False
IE_NAME = 'nba:watch:embed'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
_TESTS = [{
@ -115,6 +117,7 @@ class NBAWatchEmbedIE(NBAWatchBaseIE):
class NBAWatchIE(NBAWatchBaseIE):
_WORKING = False
IE_NAME = 'nba:watch'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{
@ -167,6 +170,7 @@ class NBAWatchIE(NBAWatchBaseIE):
class NBAWatchCollectionIE(NBAWatchBaseIE):
_WORKING = False
IE_NAME = 'nba:watch:collection'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
_TESTS = [{
@ -336,6 +340,7 @@ class NBABaseIE(NBACVPBaseIE):
class NBAEmbedIE(NBABaseIE):
_WORKING = False
IE_NAME = 'nba:embed'
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
_TESTS = [{
@ -358,6 +363,7 @@ class NBAEmbedIE(NBABaseIE):
class NBAIE(NBABaseIE):
_WORKING = False
IE_NAME = 'nba'
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{
@ -385,6 +391,7 @@ class NBAIE(NBABaseIE):
class NBAChannelIE(NBABaseIE):
_WORKING = False
IE_NAME = 'nba:channel'
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P<id>[^/?#&]+)'
_TESTS = [{

View File

@ -6,7 +6,7 @@ import xml.etree.ElementTree
from .adobepass import AdobePassIE
from .common import InfoExtractor
from .theplatform import ThePlatformIE, default_ns
from .theplatform import ThePlatformBaseIE, ThePlatformIE, default_ns
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
@ -14,26 +14,130 @@ from ..utils import (
UserNotLive,
clean_html,
determine_ext,
extract_attributes,
float_or_none,
get_element_html_by_class,
int_or_none,
join_nonempty,
make_archive_id,
mimetype2ext,
parse_age_limit,
parse_duration,
parse_iso8601,
remove_end,
smuggle_url,
traverse_obj,
try_get,
unescapeHTML,
unified_timestamp,
update_url_query,
url_basename,
url_or_none,
)
from ..utils.traversal import require, traverse_obj
class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
class NBCUniversalBaseIE(ThePlatformBaseIE):
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
_M3U8_RE = r'https?://[^/?#]+/prod/[\w-]+/(?P<folders>[^?#]+/)cmaf/mpeg_(?:cbcs|cenc)\w*/master_cmaf\w*\.m3u8'
def _download_nbcu_smil_and_extract_m3u8_url(self, tp_path, video_id, query):
smil = self._download_xml(
f'https://link.theplatform.com/s/{tp_path}', video_id,
'Downloading SMIL manifest', 'Failed to download SMIL manifest', query={
**query,
'format': 'SMIL', # XXX: Do not confuse "format" with "formats"
'manifest': 'm3u',
'switch': 'HLSServiceSecure', # Or else we get broken mp4 http URLs instead of HLS
}, headers=self.geo_verification_headers())
ns = f'//{{{default_ns}}}'
if url := traverse_obj(smil, (f'{ns}video/@src', lambda _, v: determine_ext(v) == 'm3u8', any)):
return url
exc = traverse_obj(smil, (f'{ns}param', lambda _, v: v.get('name') == 'exception', '@value', any))
if exc == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(traverse_obj(smil, (f'{ns}ref/@abstract', ..., any)), expected=exc == 'Expired')
def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query):
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
query['formats'] = 'm3u+none,mpeg4'
m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
query['formats'] = 'mpeg4'
m3u8_tmpl = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
# Example: https://vod-lf-oneapp-prd.akamaized.net/prod/video/{folders}master_hls.m3u8
if '{folders}' in m3u8_tmpl:
self.write_debug('Found m3u8 URL template, formatting URL path')
m3u8_url = m3u8_tmpl.format(folders=mobj.group('folders'))
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
self.report_drm(video_id)
return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
webpage = self._download_webpage(url, display_id)
settings = self._search_json(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>',
webpage, 'settings', display_id)
query = {}
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
if tve:
account_pid = tve.get('data-mpx-media-account-pid') or tve['data-mpx-account-pid']
account_id = tve['data-mpx-media-account-id']
metadata = self._parse_json(
tve.get('data-normalized-video') or '', display_id, fatal=False, transform_source=unescapeHTML)
video_id = tve.get('data-guid') or metadata['guid']
if tve.get('data-entitlement') == 'auth':
auth = settings['tve_adobe_auth']
release_pid = tve['data-release-pid']
resource = self._get_mvpd_resource(
tve.get('data-adobe-pass-resource-id') or auth['adobePassResourceId'],
tve['data-title'], release_pid, tve.get('data-rating'))
query['auth'] = self._extract_mvpd_auth(
url, release_pid, auth['adobePassRequestorId'],
resource, auth['adobePassSoftwareStatement'])
else:
ls_playlist = traverse_obj(settings, (
'ls_playlist', lambda _, v: v['defaultGuid'], any, {require('LS playlist')}))
video_id = ls_playlist['defaultGuid']
account_pid = ls_playlist.get('mpxMediaAccountPid') or ls_playlist['mpxAccountPid']
account_id = ls_playlist['mpxMediaAccountId']
metadata = traverse_obj(ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, any)) or {}
tp_path = f'{account_pid}/media/guid/{account_id}/{video_id}'
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
tp_metadata = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
parsed_info = self._parse_theplatform_metadata(tp_metadata)
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
return {
**parsed_info,
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('durationInSeconds', {int_or_none}),
'timestamp': ('airDate', {parse_iso8601}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'season_number': ('seasonNumber', {int_or_none}),
'episode_number': ('episodeNumber', {int_or_none}),
'episode': ('episodeTitle', {str}),
'series': ('show', {str}),
}),
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'_old_archive_ids': [make_archive_id(old_ie_key, video_id)] if old_ie_key else None,
}
class NBCIE(NBCUniversalBaseIE):
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P<id>\w+))'
_TESTS = [
{
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
@ -49,47 +153,20 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'episode_number': 86,
'season': 'Season 2',
'season_number': 2,
'series': 'Tonight Show: Jimmy Fallon',
'duration': 237.0,
'chapters': 'count:1',
'tags': 'count:4',
'series': 'Tonight',
'duration': 236.504,
'tags': 'count:2',
'thumbnail': r're:https?://.+\.jpg',
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
'media_type': 'Full Episode',
'age_limit': 14,
'_old_archive_ids': ['theplatform 2848237'],
},
'params': {
'skip_download': 'm3u8',
},
},
{
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
'info_dict': {
'id': '2832821',
'ext': 'mp4',
'title': 'Star Wars Teaser',
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
'timestamp': 1417852800,
'upload_date': '20141206',
'uploader': 'NBCU-COM',
},
'skip': 'page not found',
},
{
# HLS streams requires the 'hdnea3' cookie
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
'info_dict': {
'id': '101528f5a9e8127b107e98c5e6ce4638',
'ext': 'mp4',
'title': 'Goliath',
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
'timestamp': 1237100400,
'upload_date': '20090315',
'uploader': 'NBCU-COM',
},
'skip': 'page not found',
},
{
# manifest url does not have extension
'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
'info_dict': {
'id': '3646439',
@ -99,48 +176,47 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'episode_number': 1,
'season': 'Season 75',
'season_number': 75,
'series': 'The Golden Globe Awards',
'series': 'Golden Globes',
'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
'uploader': 'NBCU-COM',
'upload_date': '20180107',
'timestamp': 1515312000,
'duration': 570.0,
'duration': 569.703,
'tags': 'count:8',
'thumbnail': r're:https?://.+\.jpg',
'chapters': 'count:1',
'media_type': 'Highlight',
'age_limit': 0,
'categories': ['Series/The Golden Globe Awards'],
'_old_archive_ids': ['theplatform 3646439'],
},
'params': {
'skip_download': 'm3u8',
},
},
{
# new video_id format
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
# Needs to be extracted from webpage instead of GraphQL
'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823',
'info_dict': {
'id': 'NBCE125189978',
'id': 'para24_sww_alitruwittodayshow_240823',
'ext': 'mp4',
'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
'uploader': 'NBCU-COM',
'series': 'Quantum Leap',
'season': 'Season 1',
'season_number': 1,
'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
'episode_number': 1,
'duration': 170.171,
'chapters': [],
'timestamp': 1663956155,
'upload_date': '20220923',
'tags': 'count:10',
'age_limit': 0,
'title': 'Ali Truwit found purpose in the pool after her life changed',
'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb',
'uploader': 'NBCU-SPORTS',
'duration': 311.077,
'thumbnail': r're:https?://.+\.jpg',
'categories': ['Series/Quantum Leap 2022'],
'media_type': 'Highlight',
'episode': 'Ali Truwit found purpose in the pool after her life changed',
'timestamp': 1724435902.0,
'upload_date': '20240823',
'_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'],
},
'params': {
'skip_download': 'm3u8',
},
},
{
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
'only_matching': True,
},
{
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
@ -151,6 +227,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'only_matching': True,
},
]
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A'
def _real_extract(self, url):
permalink, video_id = self._match_valid_url(url).groups()
@ -196,62 +273,50 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'userId': '0',
}),
})['data']['bonanzaPage']['metadata']
query = {
'mbr': 'true',
'manifest': 'm3u',
'switch': 'HLSServiceSecure',
}
if not video_data:
# Some videos are not available via GraphQL API
webpage = self._download_webpage(url, video_id)
video_data = self._search_json(
r'<script>\s*PRELOAD\s*=', webpage, 'video data',
video_id)['pages'][urllib.parse.urlparse(url).path]['base']['metadata']
video_id = video_data['mpxGuid']
tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id)
tpm = self._download_theplatform_metadata(tp_path, video_id)
title = tpm.get('title') or video_data.get('secondaryTitle')
tp_path = f'NnzsPC/media/guid/{video_data["mpxAccountId"]}/{video_id}'
tpm = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
title = traverse_obj(tpm, ('title', {str})) or video_data.get('secondaryTitle')
query = {}
if video_data.get('locked'):
resource = self._get_mvpd_resource(
video_data.get('resourceId') or 'nbcentertainment',
title, video_id, video_data.get('rating'))
video_data['resourceId'], title, video_id, video_data.get('rating'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id),
query), {'force_smil_url': True})
url, video_id, 'nbcentertainment', resource, self._SOFTWARE_STATEMENT)
# Empty string or 0 can be valid values for these. So the check must be `is None`
description = video_data.get('description')
if description is None:
description = tpm.get('description')
episode_number = int_or_none(video_data.get('episodeNumber'))
if episode_number is None:
episode_number = int_or_none(tpm.get('nbcu$airOrder'))
rating = video_data.get('rating')
if rating is None:
try_get(tpm, lambda x: x['ratings'][0]['rating'])
season_number = int_or_none(video_data.get('seasonNumber'))
if season_number is None:
season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
series = video_data.get('seriesShortTitle')
if series is None:
series = tpm.get('nbcu$seriesShortTitle')
tags = video_data.get('keywords')
if tags is None or len(tags) == 0:
tags = tpm.get('keywords')
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
parsed_info = self._parse_theplatform_metadata(tpm)
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
return {
'_type': 'url_transparent',
'age_limit': parse_age_limit(rating),
'description': description,
'episode': title,
'episode_number': episode_number,
**traverse_obj(video_data, {
'description': ('description', {str}, filter),
'episode': ('secondaryTitle', {str}, filter),
'episode_number': ('episodeNumber', {int_or_none}),
'season_number': ('seasonNumber', {int_or_none}),
'age_limit': ('rating', {parse_age_limit}),
'tags': ('keywords', ..., {str}, filter, all, filter),
'series': ('seriesShortTitle', {str}),
}),
**parsed_info,
'id': video_id,
'ie_key': 'ThePlatform',
'season_number': season_number,
'series': series,
'tags': tags,
'title': title,
'url': theplatform_url,
'formats': formats,
'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('ThePlatform', video_id)],
}
class NBCSportsVPlayerIE(InfoExtractor):
_WORKING = False
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
_EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)']
@ -286,6 +351,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
class NBCSportsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
_TESTS = [{
@ -321,6 +387,7 @@ class NBCSportsIE(InfoExtractor):
class NBCSportsStreamIE(AdobePassIE):
_WORKING = False
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
_TEST = {
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
@ -354,7 +421,7 @@ class NBCSportsStreamIE(AdobePassIE):
source_url = video_source['ottStreamUrl']
is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource, None) # XXX: None arg needs to be software_statement
tokenized_url = self._download_json(
'https://token.playmakerservices.com/cdn',
video_id, data=json.dumps({
@ -534,22 +601,26 @@ class NBCOlympicsIE(InfoExtractor):
IE_NAME = 'nbcolympics'
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
_TEST = {
_TESTS = [{
# Geo-restricted to US
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
'md5': '54fecf846d05429fbaa18af557ee523a',
'url': 'https://www.nbcolympics.com/videos/watch-final-minutes-team-usas-mens-basketball-gold',
'info_dict': {
'id': 'WjTBzDXx5AUq',
'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
'id': 'SAwGfPlQ1q01',
'ext': 'mp4',
'title': 'Rose\'s son Leo was in tears after his dad won gold',
'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
'timestamp': 1471274964,
'upload_date': '20160815',
'display_id': 'watch-final-minutes-team-usas-mens-basketball-gold',
'title': 'Watch the final minutes of Team USA\'s men\'s basketball gold',
'description': 'md5:f704f591217305c9559b23b877aa8d31',
'uploader': 'NBCU-SPORTS',
'duration': 387.053,
'thumbnail': r're:https://.+/.+\.jpg',
'chapters': [],
'timestamp': 1723346984,
'upload_date': '20240811',
},
'skip': '404 Not Found',
}
}, {
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@ -578,6 +649,7 @@ class NBCOlympicsIE(InfoExtractor):
class NBCOlympicsStreamIE(AdobePassIE):
_WORKING = False
IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
_TESTS = [
@ -630,7 +702,8 @@ class NBCOlympicsStreamIE(AdobePassIE):
event_config.get('resourceId', 'NBCOlympics'),
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
event_config.get('ratingId', 'NO VALUE'))
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
# XXX: The None arg below needs to be the software_statement for this requestor
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource, None)
source_url = self._download_json(
'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
@ -848,3 +921,178 @@ class NBCStationsIE(InfoExtractor):
'is_live': is_live,
**info,
}
class BravoTVIE(NBCUniversalBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:bravotv|oxygen)\.com/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'info_dict': {
'id': '3923059',
'ext': 'mp4',
'title': 'The Top Chef Season 16 Winner Is...',
'display_id': 'the-top-chef-season-16-winner-is',
'description': 'Find out who takes the title of Top Chef!',
'upload_date': '20190315',
'timestamp': 1552618860,
'season_number': 16,
'episode_number': 15,
'series': 'Top Chef',
'episode': 'Finale',
'duration': 190,
'season': 'Season 16',
'thumbnail': r're:^https://.+\.jpg',
'uploader': 'NBCU-BRAV',
'categories': ['Series', 'Series/Top Chef'],
'tags': 'count:10',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
'info_dict': {
'id': '9000234570',
'ext': 'mp4',
'title': 'London Calling',
'display_id': 'london-calling',
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
'upload_date': '20230310',
'timestamp': 1678418100,
'season_number': 20,
'episode_number': 1,
'series': 'Top Chef',
'episode': 'London Calling',
'duration': 3266,
'season': 'Season 20',
'chapters': 'count:7',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
'media_type': 'Full Episode',
'uploader': 'NBCU-MPAT',
'categories': ['Series/Top Chef'],
'tags': 'count:10',
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
'info_dict': {
'id': '3692045',
'ext': 'mp4',
'title': 'Closing Night',
'display_id': 'closing-night',
'description': 'md5:c8a5bb523c8ef381f3328c6d9f1e4632',
'upload_date': '20230126',
'timestamp': 1674709200,
'season_number': 1,
'episode_number': 1,
'series': 'In Ice Cold Blood',
'episode': 'Closing Night',
'duration': 2629,
'season': 'Season 1',
'chapters': 'count:6',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
'media_type': 'Full Episode',
'uploader': 'NBCU-MPAT',
'categories': ['Series/In Ice Cold Blood'],
'tags': ['ice-t', 'in ice cold blood', 'law and order', 'oxygen', 'true crime'],
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
'info_dict': {
'id': '3974019',
'ext': 'mp4',
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
'display_id': 'handling-the-horwitz-house-after-the-murder-season-2',
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
'upload_date': '20190618',
'timestamp': 1560819600,
'season_number': 2,
'episode_number': 16,
'series': 'In Ice Cold Blood',
'episode': 'Mother Vs Son',
'duration': 68,
'season': 'Season 2',
'thumbnail': r're:^https://.+\.jpg',
'age_limit': 14,
'uploader': 'NBCU-OXY',
'categories': ['Series/In Ice Cold Blood'],
'tags': ['in ice cold blood', 'ice-t', 'law and order', 'true crime', 'oxygen'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._extract_nbcu_video(url, display_id)
class SyfyIE(NBCUniversalBaseIE):
_VALID_URL = r'https?://(?:www\.)?syfy\.com/[^/?#]+/(?:season-\d+/episode-\d+/(?:videos/)?|videos/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.syfy.com/face-off/season-13/episode-10/videos/keyed-up',
'info_dict': {
'id': '3774403',
'ext': 'mp4',
'display_id': 'keyed-up',
'title': 'Keyed Up',
'description': 'md5:feafd15bee449f212dcd3065bbe9a755',
'age_limit': 14,
'duration': 169,
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
'series': 'Face Off',
'season': 'Season 13',
'season_number': 13,
'episode': 'Through the Looking Glass Part 2',
'episode_number': 10,
'timestamp': 1533711618,
'upload_date': '20180808',
'media_type': 'Excerpt',
'uploader': 'NBCU-MPAT',
'categories': ['Series/Face Off'],
'tags': 'count:15',
'_old_archive_ids': ['theplatform 3774403'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.syfy.com/face-off/season-13/episode-10/through-the-looking-glass-part-2',
'info_dict': {
'id': '3772391',
'ext': 'mp4',
'display_id': 'through-the-looking-glass-part-2',
'title': 'Through the Looking Glass Pt.2',
'description': 'md5:90bd5dcbf1059fe3296c263599af41d2',
'age_limit': 0,
'duration': 2599,
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
'chapters': [{'start_time': 0.0, 'end_time': 679.0, 'title': '<Untitled Chapter 1>'},
{'start_time': 679.0, 'end_time': 1040.967, 'title': '<Untitled Chapter 2>'},
{'start_time': 1040.967, 'end_time': 1403.0, 'title': '<Untitled Chapter 3>'},
{'start_time': 1403.0, 'end_time': 1870.0, 'title': '<Untitled Chapter 4>'},
{'start_time': 1870.0, 'end_time': 2496.967, 'title': '<Untitled Chapter 5>'},
{'start_time': 2496.967, 'end_time': 2599, 'title': '<Untitled Chapter 6>'}],
'series': 'Face Off',
'season': 'Season 13',
'season_number': 13,
'episode': 'Through the Looking Glass Part 2',
'episode_number': 10,
'timestamp': 1672570800,
'upload_date': '20230101',
'media_type': 'Full Episode',
'uploader': 'NBCU-MPAT',
'categories': ['Series/Face Off'],
'tags': 'count:15',
'_old_archive_ids': ['theplatform 3772391'],
},
'params': {'skip_download': 'm3u8'},
'skip': 'This video requires AdobePass MSO credentials',
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._extract_nbcu_video(url, display_id, old_ie_key='ThePlatform')

View File

@ -1,58 +0,0 @@
from .adobepass import AdobePassIE
from ..utils import (
smuggle_url,
update_url_query,
)
class SyfyIE(AdobePassIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
'info_dict': {
'id': '2968097',
'ext': 'mp4',
'title': 'The Internet Ruined My Life: Season 1 Trailer',
'description': 'One tweet, one post, one click, can destroy everything.',
'uploader': 'NBCU-MPAT',
'upload_date': '20170113',
'timestamp': 1484345640,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
syfy_mpx = next(iter(self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
display_id)['syfy']['syfy_mpx'].values()))
video_id = syfy_mpx['mpxGUID']
title = syfy_mpx['episodeTitle']
query = {
'mbr': 'true',
'manifest': 'm3u',
}
if syfy_mpx.get('entitlement') == 'auth':
resource = self._get_mvpd_resource(
'syfy', title, video_id,
syfy_mpx.get('mpxRating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'syfy', resource)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(
self._proto_relative_url(syfy_mpx['releaseURL']), query),
{'force_smil_url': True}),
'title': title,
'id': video_id,
'display_id': display_id,
}

View File

@ -32,6 +32,10 @@ class TBSIE(TurnerBaseIE):
'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
'only_matching': True,
}]
_SOFTWARE_STATEMENT_MAP = {
'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg',
'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA',
}
def _real_extract(self, url):
site, path, display_id = self._match_valid_url(url).groups()
@ -48,7 +52,7 @@ class TBSIE(TurnerBaseIE):
drupal_settings['ngtv_token_url']).query)
info = self._extract_ngtv_info(
media_id, tokenizer_query, {
media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], {
'url': url,
'site_name': site[:3].upper(),
'auth_required': video_data.get('authRequired') == '1' or is_live,

View File

@ -156,6 +156,7 @@ class TeamcocoIE(TeamcocoBaseIE):
class ConanClassicIE(TeamcocoBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:(?:www\.)?conanclassic|conan25\.teamcoco)\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [{
'url': 'https://conanclassic.com/video/ice-cube-kevin-hart-conan-share-lyft',
@ -263,7 +264,7 @@ class ConanClassicIE(TeamcocoBaseIE):
info.update(self._extract_ngtv_info(media_id, {
'accessToken': token,
'accessTokenType': 'jws',
}))
}, None)) # TODO: the None arg needs to be the AdobePass software_statement
else:
formats, subtitles = self._get_formats_and_subtitles(
traverse_obj(response, ('data', 'findRecordVideoMetadata')), video_id)

View File

@ -12,11 +12,13 @@ from ..utils import (
float_or_none,
int_or_none,
mimetype2ext,
parse_age_limit,
parse_qs,
traverse_obj,
unsmuggle_url,
update_url,
update_url_query,
url_or_none,
urlhandle_detect_ext,
xpath_with_ns,
)
@ -63,62 +65,53 @@ class ThePlatformBaseIE(AdobePassIE):
return formats, subtitles
def _download_theplatform_metadata(self, path, video_id):
info_url = f'http://link.theplatform.{self._TP_TLD}/s/{path}?format=preview'
return self._download_json(info_url, video_id)
def _download_theplatform_metadata(self, path, video_id, fatal=True):
return self._download_json(
f'https://link.theplatform.{self._TP_TLD}/s/{path}', video_id,
fatal=fatal, query={'format': 'preview'}) or {}
def _parse_theplatform_metadata(self, info):
subtitles = {}
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles.setdefault(lang, []).append({
'ext': mimetype2ext(mime),
'url': src,
})
@staticmethod
def _parse_theplatform_metadata(tp_metadata):
def site_specific_filter(*fields):
return lambda k, v: v and k.endswith(tuple(f'${f}' for f in fields))
duration = info.get('duration')
tp_chapters = info.get('chapters', [])
chapters = []
if tp_chapters:
def _add_chapter(start_time, end_time):
start_time = float_or_none(start_time, 1000)
end_time = float_or_none(end_time, 1000)
if start_time is None or end_time is None:
return
chapters.append({
'start_time': start_time,
'end_time': end_time,
})
info = traverse_obj(tp_metadata, {
'title': ('title', {str}),
'episode': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('defaultThumbnailUrl', {url_or_none}),
'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'uploader': ('billingCode', {str}),
'creators': ('author', {str}, filter, all, filter),
'categories': (
'categories', lambda _, v: v.get('label') in ['category', None],
'name', {str}, filter, all, filter),
'tags': ('keywords', {str}, filter, {lambda x: re.split(r'[;,]\s?', x)}, filter),
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}, any),
'season_number': (site_specific_filter('seasonNumber'), {int_or_none}, any),
'episode_number': (site_specific_filter('episodeNumber', 'airOrder'), {int_or_none}, any),
'series': (site_specific_filter('show', 'seriesTitle', 'seriesShortTitle'), (None, ...), {str}, any),
'location': (site_specific_filter('region'), {str}, any),
'media_type': (site_specific_filter('programmingType', 'type'), {str}, any),
})
for chapter in tp_chapters[:-1]:
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {float_or_none(scale=1000)}),
}))
# Ignore pointless single chapters from short videos that span the entire video's duration
if len(chapters) > 1 or traverse_obj(chapters, (0, 'end_time')):
info['chapters'] = chapters
def extract_site_specific_field(field):
# A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
info['subtitles'] = {}
for caption in traverse_obj(tp_metadata, ('captions', lambda _, v: url_or_none(v['src']))):
info['subtitles'].setdefault(caption.get('lang') or 'en', []).append({
'url': caption['src'],
'ext': mimetype2ext(caption.get('type')),
})
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': float_or_none(duration, 1000),
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'),
'chapters': chapters,
'creator': traverse_obj(info, ('author', {str})) or None,
'categories': traverse_obj(info, (
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
'location': extract_site_specific_field('region'),
'series': extract_site_specific_field('show') or extract_site_specific_field('seriesTitle'),
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
'episode_number': int_or_none(extract_site_specific_field('episodeNumber')),
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
}
return info
def _extract_theplatform_metadata(self, path, video_id):
info = self._download_theplatform_metadata(path, video_id)

View File

@ -20,6 +20,7 @@ class TruTVIE(TurnerBaseIE):
'skip_download': True,
},
}
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'
def _real_extract(self, url):
series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
@ -39,7 +40,7 @@ class TruTVIE(TurnerBaseIE):
title = video_data['title'].strip()
info = self._extract_ngtv_info(
media_id, {}, {
media_id, {}, self._SOFTWARE_STATEMENT, {
'url': url,
'site_name': 'truTV',
'auth_required': video_data.get('isAuthRequired'),

View File

@ -22,7 +22,7 @@ class TurnerBaseIE(AdobePassIE):
def _extract_timestamp(self, video_data):
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, software_statement, custom_tokenizer_query=None):
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
if not token:
@ -34,7 +34,8 @@ class TurnerBaseIE(AdobePassIE):
else:
query['videoId'] = content_id
if ap_data.get('auth_required'):
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
query['accessToken'] = self._extract_mvpd_auth(
ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'], software_statement)
auth = self._download_xml(
tokenizer_src, content_id, query=query)
error_msg = xpath_text(auth, 'error/msg')
@ -46,7 +47,7 @@ class TurnerBaseIE(AdobePassIE):
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
return video_url + '?hdnea=' + token
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
def _extract_cvp_info(self, data_src, video_id, software_statement, path_data={}, ap_data={}, fatal=False):
video_data = self._download_xml(
data_src, video_id,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
@ -101,7 +102,7 @@ class TurnerBaseIE(AdobePassIE):
video_url = self._add_akamai_spe_token(
secure_path_data['tokenizer_src'],
secure_path_data['media_src'] + video_url,
content_id, ap_data)
content_id, ap_data, software_statement)
elif not re.match('https?://', video_url):
base_path_data = path_data.get(ext, path_data.get('default', {}))
media_src = base_path_data.get('media_src')
@ -215,10 +216,12 @@ class TurnerBaseIE(AdobePassIE):
'is_live': is_live,
}
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_data=None):
if not isinstance(ap_data, dict):
ap_data = {}
is_live = ap_data.get('is_live')
streams_data = self._download_json(
f'http://medium.ngtv.io/media/{media_id}/tv',
f'https://medium.ngtv.io/media/{media_id}/tv',
media_id)['media']['tv']
duration = None
chapters = []
@ -230,8 +233,8 @@ class TurnerBaseIE(AdobePassIE):
continue
if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token(
'http://token.ngtv.io/token/token_spe',
m3u8_url, media_id, ap_data or {}, tokenizer_query)
'https://token.ngtv.io/token/token_spe',
m3u8_url, media_id, ap_data, software_statement, tokenizer_query)
formats.extend(self._extract_m3u8_formats(
m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False))

View File

@ -32,6 +32,7 @@ class ViceBaseIE(InfoExtractor):
class ViceIE(ViceBaseIE, AdobePassIE):
_WORKING = False
IE_NAME = 'vice'
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
@ -99,6 +100,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
'only_matching': True,
}]
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwMTVjODBlZC04ZDcxLTQ4ZGEtOTZkZi00NzU5NjIwNzJlYTQiLCJuYmYiOjE2NjgwMTM0ODQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjY4MDEzNDg0fQ.CjhUnTrlh-bmYnEFHyC2Y4it5Y_Zfza1x66O4-ki5gBR7JT6aUunYI_YflXomQPACriMpObkITFz4grVaDwdd8Xp9hrQ2R0SwRBdaklkdy1_j68RqSP5PnexJIa0q_ThtOwfRBd5uGcb33nMJ9Qs92W4kVXuca0Ta-i7SJyWgXUaPDlRDdgyCL3hKj5wuM7qUIwrd9A5CMm-j3dMIBCDgw7X6TwRK65eUQe6gTWqcvL2yONHHTpmIfeOTUxGwwKFr29COOTBowm0VJ6HE08xjXCShP08Neusu-JsgkjzhkEbiDE2531EKgfAki_7WCd2JUZVsAsCusv4a1maokk6NA'
def _real_extract(self, url):
locale, video_id = self._match_valid_url(url).groups()
@ -116,7 +118,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
resource = self._get_mvpd_resource(
'VICELAND', title, video_id, rating)
query['tvetoken'] = self._extract_mvpd_auth(
url, video_id, 'VICELAND', resource)
url, video_id, 'VICELAND', resource, self._SOFTWARE_STATEMENT)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
@ -181,6 +183,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
class ViceShowIE(ViceBaseIE):
_WORKING = False
IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
_PAGE_SIZE = 25
@ -221,6 +224,7 @@ class ViceShowIE(ViceBaseIE):
class ViceArticleIE(ViceBaseIE):
_WORKING = False
IE_NAME = 'vice:article'
_VALID_URL = r'https?://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'

View File

@ -2228,7 +2228,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode, player_url=None):
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('_w8_'), any)):
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
funcname = self._search_regex(
r'''(?xs)
[;\n](?:
@ -2289,8 +2289,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_player_js_global_var(self, jscode, player_url):
"""Returns tuple of strings: variable assignment code, variable name, variable value code"""
def _interpret_player_js_global_var(self, jscode, player_url):
"""Returns tuple of: variable name string, variable value list"""
extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
varcode, varname, varvalue = extract_global_var(
r'''(?x)
@ -2308,27 +2308,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(join_nonempty(
'No global array variable found in player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return varcode, varname, varvalue
return None, None
def _interpret_player_js_global_var(self, jscode, player_url):
"""Returns tuple of: variable name string, variable value list"""
_, varname, array_code = self._extract_player_js_global_var(jscode, player_url)
jsi = JSInterpreter(array_code)
jsi = JSInterpreter(varcode)
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
return varname, interpret_global_var(array_code, {}, allow_recursion=10)
return varname, interpret_global_var(varvalue, {}, allow_recursion=10)
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
varcode, varname, _ = self._extract_player_js_global_var(jscode, player_url)
if varcode and varname:
nsig_code = varcode + '; ' + nsig_code
_, global_list = self._interpret_player_js_global_var(jscode, player_url)
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if varname and global_list:
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
else:
varname = 'dlp_wins'
global_list = []
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
fixed_code = re.sub(
rf'''(?x)
fr'''(?x)
;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
(["\'])undefined\1|
{re.escape(varname)}\[{undefined_idx}\]
@ -2402,6 +2398,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts
def _mark_watched(self, video_id, player_responses):
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
label = 'fully ' if is_full else ''
url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
@ -2412,11 +2413,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
parsed_url = urllib.parse.urlparse(url)
qs = urllib.parse.parse_qs(parsed_url.query)
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
# # more consistent results setting it to right before the end
video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]