Compare commits

..

4 Commits

Author SHA1 Message Date
Parker Wahle
97f03660f5
[ie/SaucePlusChannel] Add extractor (#15830)
Closes #14985
Authored by: regulad
2026-02-20 00:07:48 +00:00
bashonly
772559e3db
[ie/tele5] Fix extractor (#16005)
Closes #16003
Authored by: bashonly
2026-02-19 23:53:53 +00:00
Achraf
c7945800e4
[ie/youtube:search:date] Remove broken ytsearchdate support (#15959)
Closes #15898
Authored by: stastix
2026-02-19 23:18:02 +00:00
bashonly
e2444584a3
[ie/facebook:ads] Fix extractor (#16002)
Closes #16000
Authored by: bashonly
2026-02-19 23:08:08 +00:00
8 changed files with 121 additions and 92 deletions

View File

@ -2261,7 +2261,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
* **YouTube improvements**: * **YouTube improvements**:
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefix (`ytsearch:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
* Download livestreams from the start using `--live-from-start` (*experimental*) * Download livestreams from the start using `--live-from-start` (*experimental*)
* Channel URLs download all uploads of the channel, including shorts and live * Channel URLs download all uploads of the channel, including shorts and live

View File

@ -1792,7 +1792,10 @@ from .safari import (
from .saitosan import SaitosanIE from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE from .samplefocus import SampleFocusIE
from .sapo import SapoIE from .sapo import SapoIE
from .sauceplus import SaucePlusIE from .sauceplus import (
SaucePlusChannelIE,
SaucePlusIE,
)
from .sbs import SBSIE from .sbs import SBSIE
from .sbscokr import ( from .sbscokr import (
SBSCoKrAllvodProgramIE, SBSCoKrAllvodProgramIE,
@ -2553,7 +2556,6 @@ from .youtube import (
YoutubeNotificationsIE, YoutubeNotificationsIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeShortsAudioPivotIE, YoutubeShortsAudioPivotIE,

View File

@ -1041,8 +1041,6 @@ class FacebookAdsIE(InfoExtractor):
'uploader': 'Casper', 'uploader': 'Casper',
'uploader_id': '224110981099062', 'uploader_id': '224110981099062',
'uploader_url': 'https://www.facebook.com/Casper/', 'uploader_url': 'https://www.facebook.com/Casper/',
'timestamp': 1766299837,
'upload_date': '20251221',
'like_count': int, 'like_count': int,
}, },
'playlist_count': 2, 'playlist_count': 2,
@ -1054,12 +1052,23 @@ class FacebookAdsIE(InfoExtractor):
'uploader': 'Case \u00e0 Chocs', 'uploader': 'Case \u00e0 Chocs',
'uploader_id': '112960472096793', 'uploader_id': '112960472096793',
'uploader_url': 'https://www.facebook.com/Caseachocs/', 'uploader_url': 'https://www.facebook.com/Caseachocs/',
'timestamp': 1768498293,
'upload_date': '20260115',
'like_count': int, 'like_count': int,
'description': 'md5:f02a255fcf7dce6ed40e9494cf4bc49a', 'description': 'md5:f02a255fcf7dce6ed40e9494cf4bc49a',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, {
'url': 'https://www.facebook.com/ads/library/?id=1704834754236452',
'info_dict': {
'id': '1704834754236452',
'ext': 'mp4',
'title': 'Get answers now!',
'description': 'Ask the best psychics and get accurate answers on questions that bother you!',
'uploader': 'Your Relationship Advisor',
'uploader_id': '108939234726306',
'uploader_url': 'https://www.facebook.com/100068970634636/',
'like_count': int,
'thumbnail': r're:https://.+/.+\.jpg',
},
}, { }, {
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569', 'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
'only_matching': True, 'only_matching': True,
@ -1123,8 +1132,11 @@ class FacebookAdsIE(InfoExtractor):
post_data = traverse_obj( post_data = traverse_obj(
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads})) re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = get_first(post_data, ( data = get_first(post_data, (
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., (
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict})) ('__bbox', 'result', 'data', 'ad_library_main', 'deeplink_ad_archive_result', 'deeplink_ad_archive'),
# old path
('entryPointRoot', 'otherProps', 'deeplinkAdCard'),
), 'snapshot', {dict}))
if not data: if not data:
raise ExtractorError('Unable to extract ad data') raise ExtractorError('Unable to extract ad data')
@ -1140,11 +1152,12 @@ class FacebookAdsIE(InfoExtractor):
'title': title, 'title': title,
'description': markup or None, 'description': markup or None,
}, traverse_obj(data, { }, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}), 'description': (
(('body', 'text'), 'link_description'),
{lambda x: x if not x.startswith('{{product.') else None}, any),
'uploader': ('page_name', {str}), 'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}), 'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}), 'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}), 'like_count': ('page_like_count', {int_or_none}),
})) }))
@ -1155,7 +1168,8 @@ class FacebookAdsIE(InfoExtractor):
entries.append({ entries.append({
'id': f'{video_id}_{idx}', 'id': f'{video_id}_{idx}',
'title': entry.get('title') or title, 'title': entry.get('title') or title,
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'), 'description': traverse_obj(
entry, 'body', 'link_description', expected_type=str) or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')), 'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry), 'formats': self._extract_formats(entry),
}) })

View File

@ -318,9 +318,48 @@ class FloatplaneIE(FloatplaneBaseIE):
self.raise_login_required() self.raise_login_required()
class FloatplaneChannelIE(InfoExtractor): class FloatplaneChannelBaseIE(InfoExtractor):
"""Subclasses must set _RESULT_IE, _BASE_URL and _PAGE_SIZE"""
def _fetch_page(self, display_id, creator_id, channel_id, page):
query = {
'id': creator_id,
'limit': self._PAGE_SIZE,
'fetchAfter': page * self._PAGE_SIZE,
}
if channel_id:
query['channel'] = channel_id
page_data = self._download_json(
f'{self._BASE_URL}/api/v3/content/creator', display_id,
query=query, note=f'Downloading page {page + 1}')
for post in page_data or []:
yield self.url_result(
f'{self._BASE_URL}/post/{post["id"]}',
self._RESULT_IE, id=post['id'], title=post.get('title'),
release_timestamp=parse_iso8601(post.get('releaseDate')))
def _real_extract(self, url):
creator, channel = self._match_valid_url(url).group('id', 'channel')
display_id = join_nonempty(creator, channel, delim='/')
creator_data = self._download_json(
f'{self._BASE_URL}/api/v3/creator/named',
display_id, query={'creatorURL[0]': creator})[0]
channel_data = traverse_obj(
creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
display_id, title=channel_data.get('title') or creator_data.get('title'),
description=channel_data.get('about') or creator_data.get('about'))
class FloatplaneChannelIE(FloatplaneChannelBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?' _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
_BASE_URL = 'https://www.floatplane.com'
_PAGE_SIZE = 20 _PAGE_SIZE = 20
_RESULT_IE = FloatplaneIE
_TESTS = [{ _TESTS = [{
'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo', 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
'info_dict': { 'info_dict': {
@ -346,36 +385,3 @@ class FloatplaneChannelIE(InfoExtractor):
}, },
'playlist_mincount': 200, 'playlist_mincount': 200,
}] }]
def _fetch_page(self, display_id, creator_id, channel_id, page):
query = {
'id': creator_id,
'limit': self._PAGE_SIZE,
'fetchAfter': page * self._PAGE_SIZE,
}
if channel_id:
query['channel'] = channel_id
page_data = self._download_json(
'https://www.floatplane.com/api/v3/content/creator', display_id,
query=query, note=f'Downloading page {page + 1}')
for post in page_data or []:
yield self.url_result(
f'https://www.floatplane.com/post/{post["id"]}',
FloatplaneIE, id=post['id'], title=post.get('title'),
release_timestamp=parse_iso8601(post.get('releaseDate')))
def _real_extract(self, url):
creator, channel = self._match_valid_url(url).group('id', 'channel')
display_id = join_nonempty(creator, channel, delim='/')
creator_data = self._download_json(
'https://www.floatplane.com/api/v3/creator/named',
display_id, query={'creatorURL[0]': creator})[0]
channel_data = traverse_obj(
creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
display_id, title=channel_data.get('title') or creator_data.get('title'),
description=channel_data.get('about') or creator_data.get('about'))

View File

@ -1,4 +1,4 @@
from .floatplane import FloatplaneBaseIE from .floatplane import FloatplaneBaseIE, FloatplaneChannelBaseIE
class SaucePlusIE(FloatplaneBaseIE): class SaucePlusIE(FloatplaneBaseIE):
@ -39,3 +39,19 @@ class SaucePlusIE(FloatplaneBaseIE):
def _real_initialize(self): def _real_initialize(self):
if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'): if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'):
self.raise_login_required() self.raise_login_required()
class SaucePlusChannelIE(FloatplaneChannelBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
_BASE_URL = 'https://www.sauceplus.com'
_RESULT_IE = SaucePlusIE
_PAGE_SIZE = 20
_TESTS = [{
'url': 'https://www.sauceplus.com/channel/williamosman/home',
'info_dict': {
'id': 'williamosman',
'title': 'William Osman',
'description': 'md5:a67bc961d23c293b2c5308d84f34f26c',
},
'playlist_mincount': 158,
}]

View File

@ -9,39 +9,39 @@ class Tele5IE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?' _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?'
_TESTS = [{ _TESTS = [{
# slug_a and slug_b # slug_a and slug_b
'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', 'url': 'https://tele5.de/mediathek/star-trek-enterprise/vox-sola',
'info_dict': { 'info_dict': {
'id': '6852024', 'id': '4140114',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Quarantäne', 'title': 'Vox Sola',
'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', 'description': 'md5:329d115f74324d4364efc1a11c4ea7c9',
'episode': 'Episode 73', 'duration': 2542.76,
'episode_number': 73, 'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g',
'season': 'Season 4',
'season_number': 4,
'series': 'Stargate Atlantis',
'upload_date': '20240525',
'timestamp': 1716643200,
'duration': 2503.2,
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
'creators': ['Tele5'],
'tags': [], 'tags': [],
'creators': ['Tele5'],
'series': 'Star Trek - Enterprise',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 22',
'episode_number': 22,
'timestamp': 1770491100,
'upload_date': '20260207',
}, },
}, { }, {
# only slug_a # only slug_a
'url': 'https://tele5.de/mediathek/inside-out', 'url': 'https://tele5.de/mediathek/30-miles-from-nowhere-im-wald-hoert-dich-niemand-schreien',
'info_dict': { 'info_dict': {
'id': '6819502', 'id': '4102641',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Inside out', 'title': '30 Miles from Nowhere - Im Wald hört dich niemand schreien',
'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', 'description': 'md5:0b731539f39ee186ebcd9dd444a86fc2',
'series': 'Inside out', 'duration': 4849.96,
'upload_date': '20240523', 'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g',
'timestamp': 1716494400,
'duration': 5343.4,
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
'creators': ['Tele5'],
'tags': [], 'tags': [],
'creators': ['Tele5'],
'series': '30 Miles from Nowhere - Im Wald hört dich niemand schreien',
'timestamp': 1770417300,
'upload_date': '20260206',
}, },
}, { }, {
# playlist # playlist
@ -50,20 +50,27 @@ class Tele5IE(DiscoveryPlusBaseIE):
'id': 'mediathek-schlefaz', 'id': 'mediathek-schlefaz',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
'skip': 'Dead link',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
query = {'environment': 'tele5', 'v': '2'} query = {
'include': 'default',
'filter[environment]': 'tele5',
'v': '2',
}
if not slug_b: if not slug_b:
endpoint = f'page/{slug_a}' endpoint = f'page/{slug_a}'
query['parent_slug'] = parent_slug query['parent_slug'] = parent_slug
else: else:
endpoint = f'videos/{slug_b}' endpoint = f'shows/{slug_a}'
query['filter[show.slug]'] = slug_a query['filter[video.slug]'] = slug_b
cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
cms_data = self._download_json(f'https://public.aurora.enhanced.live/site/{endpoint}/', playlist_id, query=query)
return self.playlist_result(map( return self.playlist_result(map(
functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),

View File

@ -16,7 +16,7 @@ from ._redirect import (
YoutubeYtBeIE, YoutubeYtBeIE,
YoutubeYtUserIE, YoutubeYtUserIE,
) )
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE from ._search import YoutubeMusicSearchURLIE, YoutubeSearchIE, YoutubeSearchURLIE
from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE
from ._video import YoutubeIE from ._video import YoutubeIE
@ -39,7 +39,6 @@ for _cls in [
YoutubeYtBeIE, YoutubeYtBeIE,
YoutubeYtUserIE, YoutubeYtUserIE,
YoutubeMusicSearchURLIE, YoutubeMusicSearchURLIE,
YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubePlaylistIE, YoutubePlaylistIE,

View File

@ -28,21 +28,6 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
}] }]
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
_TESTS = [{
'url': 'ytsearchdate5:youtube-dl test video',
'playlist_count': 5,
'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
},
}]
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
IE_DESC = 'YouTube search URLs with sorting and filter support' IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url' IE_NAME = YoutubeSearchIE.IE_NAME + '_url'