Compare commits

..

No commits in common. "7794374de8afb20499b023107e2abfd4e6b93ee4" and "e0d6c0822930f6e63f574d46d946a58b73ecd10c" have entirely different histories.

6 changed files with 32 additions and 222 deletions

View File

@ -2147,7 +2147,6 @@ from .toggle import (
from .toggo import ToggoIE from .toggo import ToggoIE
from .tonline import TOnlineIE from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE from .toongoggles import ToonGogglesIE
from .toutiao import ToutiaoIE
from .toutv import TouTvIE from .toutv import TouTvIE
from .toypics import ( from .toypics import (
ToypicsIE, ToypicsIE,

View File

@ -340,9 +340,8 @@ class PatreonIE(PatreonBaseIE):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
})) }))
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo. # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s headers = {'referer': url}
headers = {'referer': 'https://www.patreon.com/'}
# handle Vimeo embeds # handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
@ -353,7 +352,7 @@ class PatreonIE(PatreonBaseIE):
v_url, video_id, 'Checking Vimeo embed URL', headers=headers, v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
entries.append(self.url_result( entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, headers['referer']), VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True)) VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))

View File

@ -5,13 +5,11 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
OnDemandPagedList, OnDemandPagedList,
float_or_none, float_or_none,
int_or_none,
orderedSet,
str_or_none, str_or_none,
str_to_int,
traverse_obj,
unified_timestamp, unified_timestamp,
url_or_none,
) )
from ..utils.traversal import require, traverse_obj
class PodchaserIE(InfoExtractor): class PodchaserIE(InfoExtractor):
@ -23,25 +21,24 @@ class PodchaserIE(InfoExtractor):
'id': '104365585', 'id': '104365585',
'title': 'Ep. 285 freeze me off', 'title': 'Ep. 285 freeze me off',
'description': 'cam ahn', 'description': 'cam ahn',
'thumbnail': r're:https?://.+/.+\.jpg', 'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'mp3', 'ext': 'mp3',
'categories': ['Comedy', 'News', 'Politics', 'Arts'], 'categories': ['Comedy'],
'tags': ['comedy', 'dark humor'], 'tags': ['comedy', 'dark humor'],
'series': 'The Adam Friedland Show Podcast', 'series': 'Cum Town',
'duration': 3708, 'duration': 3708,
'timestamp': 1636531259, 'timestamp': 1636531259,
'upload_date': '20211110', 'upload_date': '20211110',
'average_rating': 4.0, 'average_rating': 4.0,
'series_id': '36924',
}, },
}, { }, {
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
'info_dict': { 'info_dict': {
'id': '28853', 'id': '28853',
'title': 'The Bone Zone', 'title': 'The Bone Zone',
'description': r're:The official home of the Bone Zone podcast.+', 'description': 'Podcast by The Bone Zone',
}, },
'playlist_mincount': 275, 'playlist_count': 275,
}, { }, {
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
'info_dict': { 'info_dict': {
@ -54,33 +51,19 @@ class PodchaserIE(InfoExtractor):
@staticmethod @staticmethod
def _parse_episode(episode, podcast): def _parse_episode(episode, podcast):
info = traverse_obj(episode, { return {
'id': ('id', {int}, {str_or_none}, {require('episode ID')}), 'id': str(episode.get('id')),
'title': ('title', {str}), 'title': episode.get('title'),
'description': ('description', {str}), 'description': episode.get('description'),
'url': ('audio_url', {url_or_none}), 'url': episode.get('audio_url'),
'thumbnail': ('image_url', {url_or_none}), 'thumbnail': episode.get('image_url'),
'duration': ('length', {int_or_none}), 'duration': str_to_int(episode.get('length')),
'timestamp': ('air_date', {unified_timestamp}), 'timestamp': unified_timestamp(episode.get('air_date')),
'average_rating': ('rating', {float_or_none}), 'average_rating': float_or_none(episode.get('rating')),
}) 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
info.update(traverse_obj(podcast, { 'tags': traverse_obj(podcast, ('tags', ..., 'text')),
'series': ('title', {str}), 'series': podcast.get('title'),
'series_id': ('id', {int}, {str_or_none}), }
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
'tags': ('tags', ..., 'text', {str}),
}))
info['vcodec'] = 'none'
if info.get('series_id'):
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
info['webpage_url'] = '/'.join((
'https://www.podchaser.com/podcasts',
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
return info
def _call_api(self, path, *args, **kwargs): def _call_api(self, path, *args, **kwargs):
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
@ -110,5 +93,5 @@ class PodchaserIE(InfoExtractor):
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id) episode = self._call_api(f'episodes/{episode_id}', episode_id)
return self._parse_episode(episode, podcast) return self._parse_episode(episode, podcast)

View File

@ -1,121 +0,0 @@
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
str_or_none,
try_call,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
class ToutiaoIE(InfoExtractor):
IE_NAME = 'toutiao'
IE_DESC = '今日头条'
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.toutiao.com/video/7505382061495176511/',
'info_dict': {
'id': '7505382061495176511',
'ext': 'mp4',
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
'comment_count': int,
'duration': 9.753,
'like_count': int,
'release_date': '20250517',
'release_timestamp': 1747483344,
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
'uploader': '极目新闻',
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
'view_count': int,
},
}, {
'url': 'https://www.toutiao.com/video/7479446610359878153/',
'info_dict': {
'id': '7479446610359878153',
'ext': 'mp4',
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
'comment_count': int,
'duration': 118.374,
'like_count': int,
'release_date': '20250308',
'release_timestamp': 1741444368,
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
'uploader': '小莉创意发明',
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
'view_count': int,
},
}]
def _real_initialize(self):
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
return
urlh = self._request_webpage(
'https://ttwid.bytedance.com/ttwid/union/register/', None,
'Fetching ttwid', 'Unable to fetch ttwid', headers={
'Content-Type': 'application/json',
}, data=json.dumps({
'aid': 24,
'needFid': False,
'region': 'cn',
'service': 'www.toutiao.com',
'union': True,
}).encode(),
)
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
return
self.raise_login_required()
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = traverse_obj(webpage, (
{find_element(tag='script', id='RENDER_DATA')},
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
))
formats = []
for video in traverse_obj(video_data, (
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
)):
formats.append({
'url': video['main_url'],
**traverse_obj(video, ('video_meta', {
'acodec': ('audio_profile', {str}),
'asr': ('audio_sample_rate', {int_or_none}),
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
'ext': ('vtype', {str}),
'filesize': ('size', {int_or_none}),
'format_id': ('definition', {str}),
'fps': ('fps', {int_or_none}),
'height': ('vheight', {int_or_none}),
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
'vcodec': ('codec_type', {str}),
'width': ('vwidth', {int_or_none}),
})),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'comment_count': ('commentCount', {int_or_none}),
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
'like_count': ('repinCount', {int_or_none}),
'release_timestamp': ('publishTime', {int_or_none}),
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
'title': ('title', {str}),
'uploader': ('userInfo', 'name', {str}),
'uploader_id': ('userInfo', 'userId', {str_or_none}),
'view_count': ('playCount', {int_or_none}),
'webpage_url': ('detailUrl', {url_or_none}),
}),
}

View File

@ -1,5 +1,4 @@
import base64 import base64
import hashlib
import itertools import itertools
import re import re
@ -17,7 +16,6 @@ from ..utils import (
str_to_int, str_to_int,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
@ -173,10 +171,6 @@ class TwitCastingIE(InfoExtractor):
'player': 'pc_web', 'player': 'pc_web',
}) })
password_params = {
'word': hashlib.md5(video_password.encode()).hexdigest(),
} if video_password else None
formats = [] formats = []
# low: 640x360, medium: 1280x720, high: 1920x1080 # low: 640x360, medium: 1280x720, high: 1920x1080
qq = qualities(['low', 'medium', 'high']) qq = qualities(['low', 'medium', 'high'])
@ -184,7 +178,7 @@ class TwitCastingIE(InfoExtractor):
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]), 'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)): )):
formats.append({ formats.append({
'url': update_url_query(m3u8_url, password_params), 'url': m3u8_url,
'format_id': f'hls-{quality}', 'format_id': f'hls-{quality}',
'ext': 'mp4', 'ext': 'mp4',
'quality': qq(quality), 'quality': qq(quality),
@ -198,7 +192,7 @@ class TwitCastingIE(InfoExtractor):
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]), 'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)): )):
formats.append({ formats.append({
'url': update_url_query(ws_url, password_params), 'url': ws_url,
'format_id': f'ws-{mode}', 'format_id': f'ws-{mode}',
'ext': 'mp4', 'ext': 'mp4',
'quality': qq(mode), 'quality': qq(mode),

View File

@ -20,6 +20,7 @@ from ..utils import (
remove_end, remove_end,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
traverse_obj,
truncate_string, truncate_string,
try_call, try_call,
try_get, try_get,
@ -28,7 +29,6 @@ from ..utils import (
url_or_none, url_or_none,
xpath_text, xpath_text,
) )
from ..utils.traversal import require, traverse_obj
class TwitterBaseIE(InfoExtractor): class TwitterBaseIE(InfoExtractor):
@ -1596,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast' IE_NAME = 'twitter:broadcast'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
_TESTS = [{ _TESTS = [{
# untitled Periscope video # untitled Periscope video
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj', 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
@ -1605,7 +1605,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1yNGaQLWpejGj', 'id': '1yNGaQLWpejGj',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast', 'title': 'Andrea May Sahouri - Periscope Broadcast',
'display_id': '1yNGaQLWpejGj',
'uploader': 'Andrea May Sahouri', 'uploader': 'Andrea May Sahouri',
'uploader_id': 'andreamsahouri', 'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri', 'uploader_url': 'https://twitter.com/andreamsahouri',
@ -1613,8 +1612,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20200601', 'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int, 'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
}, },
}, { }, {
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv', 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
@ -1622,7 +1619,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1ZkKzeyrPbaxv', 'id': '1ZkKzeyrPbaxv',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Starship | SN10 | High-Altitude Flight Test', 'title': 'Starship | SN10 | High-Altitude Flight Test',
'display_id': '1ZkKzeyrPbaxv',
'uploader': 'SpaceX', 'uploader': 'SpaceX',
'uploader_id': 'SpaceX', 'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX', 'uploader_url': 'https://twitter.com/SpaceX',
@ -1630,8 +1626,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20210303', 'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int, 'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
}, },
}, { }, {
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb', 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
@ -1639,7 +1633,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1OyKAVQrgzwGb', 'id': '1OyKAVQrgzwGb',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Starship Flight Test', 'title': 'Starship Flight Test',
'display_id': '1OyKAVQrgzwGb',
'uploader': 'SpaceX', 'uploader': 'SpaceX',
'uploader_id': 'SpaceX', 'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX', 'uploader_url': 'https://twitter.com/SpaceX',
@ -1647,58 +1640,21 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20230420', 'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int, 'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
},
}, {
'url': 'https://x.com/i/events/1910629646300762112',
'info_dict': {
'id': '1LyxBWDRNqyKN',
'ext': 'mp4',
'title': '#ガンニバル ウォッチパーティー',
'concurrent_view_count': int,
'display_id': '1910629646300762112',
'live_status': 'was_live',
'release_date': '20250423',
'release_timestamp': 1745409000,
'tags': ['ガンニバル'],
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
'timestamp': 1745403328,
'upload_date': '20250423',
'uploader': 'ディズニープラス公式',
'uploader_id': 'DisneyPlusJP',
'uploader_url': 'https://twitter.com/DisneyPlusJP',
'view_count': int,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id') broadcast_id = self._match_id(url)
if broadcast_type == 'events':
timeline = self._call_api(
f'live_event/1/{display_id}/timeline.json', display_id)
broadcast_id = traverse_obj(timeline, (
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
{str}, any, {require('broadcast ID')}))
else:
broadcast_id = display_id
broadcast = self._call_api( broadcast = self._call_api(
'broadcasts/show.json', broadcast_id, 'broadcasts/show.json', broadcast_id,
{'ids': broadcast_id})['broadcasts'][broadcast_id] {'ids': broadcast_id})['broadcasts'][broadcast_id]
if not broadcast: if not broadcast:
raise ExtractorError('Broadcast no longer exists', expected=True) raise ExtractorError('Broadcast no longer exists', expected=True)
info = self._parse_broadcast_data(broadcast, broadcast_id) info = self._parse_broadcast_data(broadcast, broadcast_id)
info.update({ info['title'] = broadcast.get('status') or info.get('title')
'display_id': display_id, info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
'title': broadcast.get('status') or info.get('title'), info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
'uploader_url': format_field(
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
})
if info['live_status'] == 'is_upcoming': if info['live_status'] == 'is_upcoming':
self.raise_no_formats('This live broadcast has not yet started', expected=True)
return info return info
media_key = broadcast['media_key'] media_key = broadcast['media_key']