mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-29 03:55:53 +00:00
Compare commits
5 Commits
e0d6c08229
...
7794374de8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7794374de8 | ||
|
|
538eb30567 | ||
|
|
f8051e3a61 | ||
|
|
52f9729c9a | ||
|
|
1a8a03ea8d |
@ -2147,6 +2147,7 @@ from .toggle import (
|
||||
from .toggo import ToggoIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutiao import ToutiaoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import (
|
||||
ToypicsIE,
|
||||
|
||||
@ -340,8 +340,9 @@ class PatreonIE(PatreonBaseIE):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': url}
|
||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||
headers = {'referer': 'https://www.patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
@ -352,7 +353,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
|
||||
@ -5,11 +5,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class PodchaserIE(InfoExtractor):
|
||||
@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
|
||||
'id': '104365585',
|
||||
'title': 'Ep. 285 – freeze me off',
|
||||
'description': 'cam ahn',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'ext': 'mp3',
|
||||
'categories': ['Comedy'],
|
||||
'categories': ['Comedy', 'News', 'Politics', 'Arts'],
|
||||
'tags': ['comedy', 'dark humor'],
|
||||
'series': 'Cum Town',
|
||||
'series': 'The Adam Friedland Show Podcast',
|
||||
'duration': 3708,
|
||||
'timestamp': 1636531259,
|
||||
'upload_date': '20211110',
|
||||
'average_rating': 4.0,
|
||||
'series_id': '36924',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
|
||||
'info_dict': {
|
||||
'id': '28853',
|
||||
'title': 'The Bone Zone',
|
||||
'description': 'Podcast by The Bone Zone',
|
||||
'description': r're:The official home of the Bone Zone podcast.+',
|
||||
},
|
||||
'playlist_count': 275,
|
||||
'playlist_mincount': 275,
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
|
||||
'info_dict': {
|
||||
@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _parse_episode(episode, podcast):
|
||||
return {
|
||||
'id': str(episode.get('id')),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'url': episode.get('audio_url'),
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'duration': str_to_int(episode.get('length')),
|
||||
'timestamp': unified_timestamp(episode.get('air_date')),
|
||||
'average_rating': float_or_none(episode.get('rating')),
|
||||
'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
|
||||
'tags': traverse_obj(podcast, ('tags', ..., 'text')),
|
||||
'series': podcast.get('title'),
|
||||
}
|
||||
info = traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('audio_url', {url_or_none}),
|
||||
'thumbnail': ('image_url', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'timestamp': ('air_date', {unified_timestamp}),
|
||||
'average_rating': ('rating', {float_or_none}),
|
||||
})
|
||||
info.update(traverse_obj(podcast, {
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('id', {int}, {str_or_none}),
|
||||
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
|
||||
'tags': ('tags', ..., 'text', {str}),
|
||||
}))
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
if info.get('series_id'):
|
||||
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
|
||||
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
|
||||
info['webpage_url'] = '/'.join((
|
||||
'https://www.podchaser.com/podcasts',
|
||||
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
|
||||
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
|
||||
@ -93,5 +110,5 @@ class PodchaserIE(InfoExtractor):
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
|
||||
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
|
||||
|
||||
episode = self._call_api(f'episodes/{episode_id}', episode_id)
|
||||
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
|
||||
return self._parse_episode(episode, podcast)
|
||||
|
||||
121
yt_dlp/extractor/toutiao.py
Normal file
121
yt_dlp/extractor/toutiao.py
Normal file
@ -0,0 +1,121 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ToutiaoIE(InfoExtractor):
|
||||
IE_NAME = 'toutiao'
|
||||
IE_DESC = '今日头条'
|
||||
|
||||
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.toutiao.com/video/7505382061495176511/',
|
||||
'info_dict': {
|
||||
'id': '7505382061495176511',
|
||||
'ext': 'mp4',
|
||||
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
|
||||
'comment_count': int,
|
||||
'duration': 9.753,
|
||||
'like_count': int,
|
||||
'release_date': '20250517',
|
||||
'release_timestamp': 1747483344,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '极目新闻',
|
||||
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.toutiao.com/video/7479446610359878153/',
|
||||
'info_dict': {
|
||||
'id': '7479446610359878153',
|
||||
'ext': 'mp4',
|
||||
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
|
||||
'comment_count': int,
|
||||
'duration': 118.374,
|
||||
'like_count': int,
|
||||
'release_date': '20250308',
|
||||
'release_timestamp': 1741444368,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '小莉创意发明',
|
||||
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
|
||||
return
|
||||
|
||||
urlh = self._request_webpage(
|
||||
'https://ttwid.bytedance.com/ttwid/union/register/', None,
|
||||
'Fetching ttwid', 'Unable to fetch ttwid', headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'aid': 24,
|
||||
'needFid': False,
|
||||
'region': 'cn',
|
||||
'service': 'www.toutiao.com',
|
||||
'union': True,
|
||||
}).encode(),
|
||||
)
|
||||
|
||||
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
|
||||
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
|
||||
return
|
||||
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='RENDER_DATA')},
|
||||
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
|
||||
))
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, (
|
||||
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
|
||||
)):
|
||||
formats.append({
|
||||
'url': video['main_url'],
|
||||
**traverse_obj(video, ('video_meta', {
|
||||
'acodec': ('audio_profile', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
|
||||
'ext': ('vtype', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'format_id': ('definition', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'height': ('vheight', {int_or_none}),
|
||||
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
|
||||
'vcodec': ('codec_type', {str}),
|
||||
'width': ('vwidth', {int_or_none}),
|
||||
})),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
|
||||
'like_count': ('repinCount', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('userInfo', 'name', {str}),
|
||||
'uploader_id': ('userInfo', 'userId', {str_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'webpage_url': ('detailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@ -16,6 +17,7 @@ from ..utils import (
|
||||
str_to_int,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@ -171,6 +173,10 @@ class TwitCastingIE(InfoExtractor):
|
||||
'player': 'pc_web',
|
||||
})
|
||||
|
||||
password_params = {
|
||||
'word': hashlib.md5(video_password.encode()).hexdigest(),
|
||||
} if video_password else None
|
||||
|
||||
formats = []
|
||||
# low: 640x360, medium: 1280x720, high: 1920x1080
|
||||
qq = qualities(['low', 'medium', 'high'])
|
||||
@ -178,7 +184,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': m3u8_url,
|
||||
'url': update_url_query(m3u8_url, password_params),
|
||||
'format_id': f'hls-{quality}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(quality),
|
||||
@ -192,7 +198,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': ws_url,
|
||||
'url': update_url_query(ws_url, password_params),
|
||||
'format_id': f'ws-{mode}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(mode),
|
||||
|
||||
@ -20,7 +20,6 @@ from ..utils import (
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
@ -29,6 +28,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
@ -1596,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
||||
|
||||
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
IE_NAME = 'twitter:broadcast'
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# untitled Periscope video
|
||||
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
|
||||
@ -1605,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1yNGaQLWpejGj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
||||
'display_id': '1yNGaQLWpejGj',
|
||||
'uploader': 'Andrea May Sahouri',
|
||||
'uploader_id': 'andreamsahouri',
|
||||
'uploader_url': 'https://twitter.com/andreamsahouri',
|
||||
@ -1612,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20200601',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
|
||||
@ -1619,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1ZkKzeyrPbaxv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship | SN10 | High-Altitude Flight Test',
|
||||
'display_id': '1ZkKzeyrPbaxv',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1626,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20210303',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
|
||||
@ -1633,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1OyKAVQrgzwGb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship Flight Test',
|
||||
'display_id': '1OyKAVQrgzwGb',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1640,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20230420',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://x.com/i/events/1910629646300762112',
|
||||
'info_dict': {
|
||||
'id': '1LyxBWDRNqyKN',
|
||||
'ext': 'mp4',
|
||||
'title': '#ガンニバル ウォッチパーティー',
|
||||
'concurrent_view_count': int,
|
||||
'display_id': '1910629646300762112',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20250423',
|
||||
'release_timestamp': 1745409000,
|
||||
'tags': ['ガンニバル'],
|
||||
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
|
||||
'timestamp': 1745403328,
|
||||
'upload_date': '20250423',
|
||||
'uploader': 'ディズニープラス公式',
|
||||
'uploader_id': 'DisneyPlusJP',
|
||||
'uploader_url': 'https://twitter.com/DisneyPlusJP',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
if broadcast_type == 'events':
|
||||
timeline = self._call_api(
|
||||
f'live_event/1/{display_id}/timeline.json', display_id)
|
||||
broadcast_id = traverse_obj(timeline, (
|
||||
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
|
||||
{str}, any, {require('broadcast ID')}))
|
||||
else:
|
||||
broadcast_id = display_id
|
||||
|
||||
broadcast = self._call_api(
|
||||
'broadcasts/show.json', broadcast_id,
|
||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||
if not broadcast:
|
||||
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||
info['title'] = broadcast.get('status') or info.get('title')
|
||||
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
|
||||
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': broadcast.get('status') or info.get('title'),
|
||||
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
|
||||
'uploader_url': format_field(
|
||||
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
|
||||
})
|
||||
if info['live_status'] == 'is_upcoming':
|
||||
self.raise_no_formats('This live broadcast has not yet started', expected=True)
|
||||
return info
|
||||
|
||||
media_key = broadcast['media_key']
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user