Compare commits

..

No commits in common. "7794374de8afb20499b023107e2abfd4e6b93ee4" and "e0d6c0822930f6e63f574d46d946a58b73ecd10c" have entirely different histories.

6 changed files with 32 additions and 222 deletions

View File

@ -2147,7 +2147,6 @@ from .toggle import (
from .toggo import ToggoIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutiao import ToutiaoIE
from .toutv import TouTvIE
from .toypics import (
ToypicsIE,

View File

@ -340,9 +340,8 @@ class PatreonIE(PatreonBaseIE):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
}))
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
headers = {'referer': 'https://www.patreon.com/'}
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
headers = {'referer': url}
# handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
@ -353,7 +352,7 @@ class PatreonIE(PatreonBaseIE):
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, headers['referer']),
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))

View File

@ -5,13 +5,11 @@ from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
float_or_none,
int_or_none,
orderedSet,
str_or_none,
str_to_int,
traverse_obj,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import require, traverse_obj
class PodchaserIE(InfoExtractor):
@ -23,25 +21,24 @@ class PodchaserIE(InfoExtractor):
'id': '104365585',
'title': 'Ep. 285 freeze me off',
'description': 'cam ahn',
'thumbnail': r're:https?://.+/.+\.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'mp3',
'categories': ['Comedy', 'News', 'Politics', 'Arts'],
'categories': ['Comedy'],
'tags': ['comedy', 'dark humor'],
'series': 'The Adam Friedland Show Podcast',
'series': 'Cum Town',
'duration': 3708,
'timestamp': 1636531259,
'upload_date': '20211110',
'average_rating': 4.0,
'series_id': '36924',
},
}, {
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
'info_dict': {
'id': '28853',
'title': 'The Bone Zone',
'description': r're:The official home of the Bone Zone podcast.+',
'description': 'Podcast by The Bone Zone',
},
'playlist_mincount': 275,
'playlist_count': 275,
}, {
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
'info_dict': {
@ -54,33 +51,19 @@ class PodchaserIE(InfoExtractor):
@staticmethod
def _parse_episode(episode, podcast):
info = traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
'title': ('title', {str}),
'description': ('description', {str}),
'url': ('audio_url', {url_or_none}),
'thumbnail': ('image_url', {url_or_none}),
'duration': ('length', {int_or_none}),
'timestamp': ('air_date', {unified_timestamp}),
'average_rating': ('rating', {float_or_none}),
})
info.update(traverse_obj(podcast, {
'series': ('title', {str}),
'series_id': ('id', {int}, {str_or_none}),
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
'tags': ('tags', ..., 'text', {str}),
}))
info['vcodec'] = 'none'
if info.get('series_id'):
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
info['webpage_url'] = '/'.join((
'https://www.podchaser.com/podcasts',
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
return info
return {
'id': str(episode.get('id')),
'title': episode.get('title'),
'description': episode.get('description'),
'url': episode.get('audio_url'),
'thumbnail': episode.get('image_url'),
'duration': str_to_int(episode.get('length')),
'timestamp': unified_timestamp(episode.get('air_date')),
'average_rating': float_or_none(episode.get('rating')),
'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
'tags': traverse_obj(podcast, ('tags', ..., 'text')),
'series': podcast.get('title'),
}
def _call_api(self, path, *args, **kwargs):
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
@ -110,5 +93,5 @@ class PodchaserIE(InfoExtractor):
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
episode = self._call_api(f'episodes/{episode_id}', episode_id)
return self._parse_episode(episode, podcast)

View File

@ -1,121 +0,0 @@
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
str_or_none,
try_call,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
class ToutiaoIE(InfoExtractor):
IE_NAME = 'toutiao'
IE_DESC = '今日头条'
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.toutiao.com/video/7505382061495176511/',
'info_dict': {
'id': '7505382061495176511',
'ext': 'mp4',
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
'comment_count': int,
'duration': 9.753,
'like_count': int,
'release_date': '20250517',
'release_timestamp': 1747483344,
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
'uploader': '极目新闻',
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
'view_count': int,
},
}, {
'url': 'https://www.toutiao.com/video/7479446610359878153/',
'info_dict': {
'id': '7479446610359878153',
'ext': 'mp4',
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
'comment_count': int,
'duration': 118.374,
'like_count': int,
'release_date': '20250308',
'release_timestamp': 1741444368,
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
'uploader': '小莉创意发明',
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
'view_count': int,
},
}]
def _real_initialize(self):
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
return
urlh = self._request_webpage(
'https://ttwid.bytedance.com/ttwid/union/register/', None,
'Fetching ttwid', 'Unable to fetch ttwid', headers={
'Content-Type': 'application/json',
}, data=json.dumps({
'aid': 24,
'needFid': False,
'region': 'cn',
'service': 'www.toutiao.com',
'union': True,
}).encode(),
)
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
return
self.raise_login_required()
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = traverse_obj(webpage, (
{find_element(tag='script', id='RENDER_DATA')},
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
))
formats = []
for video in traverse_obj(video_data, (
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
)):
formats.append({
'url': video['main_url'],
**traverse_obj(video, ('video_meta', {
'acodec': ('audio_profile', {str}),
'asr': ('audio_sample_rate', {int_or_none}),
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
'ext': ('vtype', {str}),
'filesize': ('size', {int_or_none}),
'format_id': ('definition', {str}),
'fps': ('fps', {int_or_none}),
'height': ('vheight', {int_or_none}),
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
'vcodec': ('codec_type', {str}),
'width': ('vwidth', {int_or_none}),
})),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'comment_count': ('commentCount', {int_or_none}),
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
'like_count': ('repinCount', {int_or_none}),
'release_timestamp': ('publishTime', {int_or_none}),
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
'title': ('title', {str}),
'uploader': ('userInfo', 'name', {str}),
'uploader_id': ('userInfo', 'userId', {str_or_none}),
'view_count': ('playCount', {int_or_none}),
'webpage_url': ('detailUrl', {url_or_none}),
}),
}

View File

@ -1,5 +1,4 @@
import base64
import hashlib
import itertools
import re
@ -17,7 +16,6 @@ from ..utils import (
str_to_int,
try_get,
unified_timestamp,
update_url_query,
url_or_none,
urlencode_postdata,
urljoin,
@ -173,10 +171,6 @@ class TwitCastingIE(InfoExtractor):
'player': 'pc_web',
})
password_params = {
'word': hashlib.md5(video_password.encode()).hexdigest(),
} if video_password else None
formats = []
# low: 640x360, medium: 1280x720, high: 1920x1080
qq = qualities(['low', 'medium', 'high'])
@ -184,7 +178,7 @@ class TwitCastingIE(InfoExtractor):
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'url': update_url_query(m3u8_url, password_params),
'url': m3u8_url,
'format_id': f'hls-{quality}',
'ext': 'mp4',
'quality': qq(quality),
@ -198,7 +192,7 @@ class TwitCastingIE(InfoExtractor):
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'url': update_url_query(ws_url, password_params),
'url': ws_url,
'format_id': f'ws-{mode}',
'ext': 'mp4',
'quality': qq(mode),

View File

@ -20,6 +20,7 @@ from ..utils import (
remove_end,
str_or_none,
strip_or_none,
traverse_obj,
truncate_string,
try_call,
try_get,
@ -28,7 +29,6 @@ from ..utils import (
url_or_none,
xpath_text,
)
from ..utils.traversal import require, traverse_obj
class TwitterBaseIE(InfoExtractor):
@ -1596,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
_TESTS = [{
# untitled Periscope video
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
@ -1605,7 +1605,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1yNGaQLWpejGj',
'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast',
'display_id': '1yNGaQLWpejGj',
'uploader': 'Andrea May Sahouri',
'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri',
@ -1613,8 +1612,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
},
}, {
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
@ -1622,7 +1619,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1ZkKzeyrPbaxv',
'ext': 'mp4',
'title': 'Starship | SN10 | High-Altitude Flight Test',
'display_id': '1ZkKzeyrPbaxv',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
@ -1630,8 +1626,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
},
}, {
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
@ -1639,7 +1633,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1OyKAVQrgzwGb',
'ext': 'mp4',
'title': 'Starship Flight Test',
'display_id': '1OyKAVQrgzwGb',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
@ -1647,58 +1640,21 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
'concurrent_view_count': int,
'live_status': 'was_live',
},
}, {
'url': 'https://x.com/i/events/1910629646300762112',
'info_dict': {
'id': '1LyxBWDRNqyKN',
'ext': 'mp4',
'title': '#ガンニバル ウォッチパーティー',
'concurrent_view_count': int,
'display_id': '1910629646300762112',
'live_status': 'was_live',
'release_date': '20250423',
'release_timestamp': 1745409000,
'tags': ['ガンニバル'],
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
'timestamp': 1745403328,
'upload_date': '20250423',
'uploader': 'ディズニープラス公式',
'uploader_id': 'DisneyPlusJP',
'uploader_url': 'https://twitter.com/DisneyPlusJP',
'view_count': int,
},
}]
def _real_extract(self, url):
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
if broadcast_type == 'events':
timeline = self._call_api(
f'live_event/1/{display_id}/timeline.json', display_id)
broadcast_id = traverse_obj(timeline, (
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
{str}, any, {require('broadcast ID')}))
else:
broadcast_id = display_id
broadcast_id = self._match_id(url)
broadcast = self._call_api(
'broadcasts/show.json', broadcast_id,
{'ids': broadcast_id})['broadcasts'][broadcast_id]
if not broadcast:
raise ExtractorError('Broadcast no longer exists', expected=True)
info = self._parse_broadcast_data(broadcast, broadcast_id)
info.update({
'display_id': display_id,
'title': broadcast.get('status') or info.get('title'),
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
'uploader_url': format_field(
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
})
info['title'] = broadcast.get('status') or info.get('title')
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
if info['live_status'] == 'is_upcoming':
self.raise_no_formats('This live broadcast has not yet started', expected=True)
return info
media_key = broadcast['media_key']