mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-30 12:36:03 +00:00
Compare commits
2 Commits
c7bda2192a
...
fe5ae54a7b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe5ae54a7b | ||
|
|
78748b506f |
@ -1,47 +1,125 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
jwt_decode_hs256,
|
||||
jwt_encode,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||
IE_NAME = 'apple:music:connect'
|
||||
IE_DESC = 'Apple Music Connect'
|
||||
|
||||
_BASE_URL = 'https://music.apple.com'
|
||||
_QUALITIES = {
|
||||
'provisionalUploadVideo': None,
|
||||
'sdVideo': 480,
|
||||
'sdVideoWithPlusAudio': 480,
|
||||
'sd480pVideo': 480,
|
||||
'720pHdVideo': 720,
|
||||
'1080pHdVideo': 1080,
|
||||
}
|
||||
_VALID_URL = r'https?://music\.apple\.com/[\w-]+/post/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||
'url': 'https://music.apple.com/us/post/1018290019',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'id': '1018290019',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 177.911,
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
'uploader': 'Drake',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||
'only_matching': True,
|
||||
'url': 'https://music.apple.com/us/post/1016746627',
|
||||
'info_dict': {
|
||||
'id': '1016746627',
|
||||
'ext': 'm4v',
|
||||
'title': 'Body Shop (Madonna) - Chellous Lima (Acoustic Cover)',
|
||||
'duration': 210.278,
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'upload_date': '20150706',
|
||||
'uploader': 'Chellous Lima',
|
||||
},
|
||||
}]
|
||||
|
||||
_jwt = None
|
||||
|
||||
@staticmethod
|
||||
def _jwt_is_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _get_token(self, webpage, video_id):
|
||||
if self._jwt and not self._jwt_is_expired(self._jwt):
|
||||
return self._jwt
|
||||
|
||||
js_url = traverse_obj(webpage, (
|
||||
{find_element(tag='script', attr='crossorigin', value='', html=True)},
|
||||
{extract_attributes}, 'src', {urljoin(self._BASE_URL)}, {require('JS URL')}))
|
||||
js = self._download_webpage(
|
||||
js_url, video_id, 'Downloading token JS', 'Unable to download token JS')
|
||||
|
||||
header = jwt_encode({}, '', headers={'alg': 'ES256', 'kid': 'WebPlayKid'}).split('.')[0]
|
||||
self._jwt = self._search_regex(
|
||||
fr'(["\'])(?P<jwt>{header}(?:\.[\w-]+){{2}})\1', js, 'JSON Web Token', group='jwt')
|
||||
if self._jwt_is_expired(self._jwt):
|
||||
raise ExtractorError('The fetched token is already expired')
|
||||
|
||||
return self._jwt
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
try:
|
||||
video_json = self._html_search_regex(
|
||||
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
|
||||
except ExtractorError:
|
||||
raise ExtractorError('This post doesn\'t contain a video', expected=True)
|
||||
videos = self._download_json(
|
||||
'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos',
|
||||
video_id, headers={
|
||||
'Authorization': f'Bearer {self._get_token(webpage, video_id)}',
|
||||
'Origin': self._BASE_URL,
|
||||
}, query={'ids': video_id, 'l': 'en-US'})
|
||||
attributes = traverse_obj(videos, (
|
||||
'data', ..., 'attributes', any, {require('video information')}))
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||
formats = []
|
||||
quality = qualities(list(self._QUALITIES.keys()))
|
||||
for format_id, src_url in traverse_obj(attributes, (
|
||||
'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'ext': 'm4v',
|
||||
'format_id': format_id,
|
||||
'height': self._QUALITIES.get(format_id),
|
||||
'quality': quality(format_id),
|
||||
'url': src_url,
|
||||
**parse_resolution(update_url(src_url, query=None), lenient=True),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_data['sslSrc'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data['description'],
|
||||
'uploader': video_data['artistName'],
|
||||
'thumbnail': video_data['artworkUrl'],
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['og:image', 'og:image:secure_url', 'twitter:image'], webpage),
|
||||
**traverse_obj(attributes, {
|
||||
'title': ('name', {str}),
|
||||
'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}),
|
||||
'upload_date': ('uploadDate', {unified_strdate}),
|
||||
'uploader': (('artistName', 'uploadingArtistName'), {str}, any),
|
||||
'webpage_url': ('postUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@ -1,46 +1,82 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
extract_attributes,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TVNoeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvnoe.cz/video/10362',
|
||||
'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
|
||||
IE_NAME = 'tvnoe'
|
||||
IE_DESC = 'Televize Noe'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
|
||||
'info_dict': {
|
||||
'id': '10362',
|
||||
'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
|
||||
'ext': 'mp4',
|
||||
'series': 'Noční univerzita',
|
||||
'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
|
||||
'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
|
||||
'title': 'Pomáhat potřebným nejen u nás',
|
||||
'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a',
|
||||
'release_date': '20250531',
|
||||
'series': 'Outdoor Films s MUDr. Tomášem Kempným',
|
||||
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni',
|
||||
'info_dict': {
|
||||
'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni',
|
||||
'ext': 'mp4',
|
||||
'title': '7. neděle velikonoční',
|
||||
'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd',
|
||||
'release_date': '20250531',
|
||||
'series': 'Zamyšlení Tomáše Halíka',
|
||||
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player = self._search_json(
|
||||
r'var\s+INIT_PLAYER\s*=', webpage, 'init player',
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
|
||||
formats = []
|
||||
for source in traverse_obj(player, ('tracks', ..., lambda _, v: url_or_none(v['src']))):
|
||||
src_url = source['src']
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
if ext == 'm3u8':
|
||||
fmts = self._extract_m3u8_formats(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts = self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
self.report_warning(f'Unsupported stream type: {ext}')
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
|
||||
ifs_page = self._download_webpage(iframe_url, video_id)
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
ifs_page, video_id, transform_source=js_to_json)
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=iframe_url)
|
||||
|
||||
info_dict.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clean_html(get_element_by_class(
|
||||
'field-name-field-podnazev', webpage)),
|
||||
'description': clean_html(get_element_by_class(
|
||||
'field-name-body', webpage)),
|
||||
'series': clean_html(get_element_by_class('title', webpage)),
|
||||
})
|
||||
|
||||
return info_dict
|
||||
'description': clean_html(self._search_regex(
|
||||
r'<p\s+class="">(.+?)</p>', webpage, 'description', default=None)),
|
||||
'formats': formats,
|
||||
**traverse_obj(webpage, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'release_date': (
|
||||
{clean_html}, {re.compile(r'Premiéra:\s*(\d{1,2}\.\d{1,2}\.\d{4})').findall},
|
||||
..., {str}, {unified_strdate}, any),
|
||||
'series': ({find_element(tag='h1')}, {clean_html}),
|
||||
'thumbnail': (
|
||||
{find_element(id='player-live', html=True)}, {extract_attributes},
|
||||
'poster', {urljoin('https://www.tvnoe.cz/')}),
|
||||
}),
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user