Compare commits

...

4 Commits

Author SHA1 Message Date
Parker Wahle
97f03660f5
[ie/SaucePlusChannel] Add extractor (#15830)
Closes #14985
Authored by: regulad
2026-02-20 00:07:48 +00:00
bashonly
772559e3db
[ie/tele5] Fix extractor (#16005)
Closes #16003
Authored by: bashonly
2026-02-19 23:53:53 +00:00
Achraf
c7945800e4
[ie/youtube:search:date] Remove broken ytsearchdate support (#15959)
Closes #15898
Authored by: stastix
2026-02-19 23:18:02 +00:00
bashonly
e2444584a3
[ie/facebook:ads] Fix extractor (#16002)
Closes #16000
Authored by: bashonly
2026-02-19 23:08:08 +00:00
8 changed files with 121 additions and 92 deletions

View File

@ -2261,7 +2261,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
* **YouTube improvements**:
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefix (`ytsearch:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
* Download livestreams from the start using `--live-from-start` (*experimental*)
* Channel URLs download all uploads of the channel, including shorts and live

View File

@ -1792,7 +1792,10 @@ from .safari import (
from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .sauceplus import SaucePlusIE
from .sauceplus import (
SaucePlusChannelIE,
SaucePlusIE,
)
from .sbs import SBSIE
from .sbscokr import (
SBSCoKrAllvodProgramIE,
@ -2553,7 +2556,6 @@ from .youtube import (
YoutubeNotificationsIE,
YoutubePlaylistIE,
YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeShortsAudioPivotIE,

View File

@ -1041,8 +1041,6 @@ class FacebookAdsIE(InfoExtractor):
'uploader': 'Casper',
'uploader_id': '224110981099062',
'uploader_url': 'https://www.facebook.com/Casper/',
'timestamp': 1766299837,
'upload_date': '20251221',
'like_count': int,
},
'playlist_count': 2,
@ -1054,12 +1052,23 @@ class FacebookAdsIE(InfoExtractor):
'uploader': 'Case \u00e0 Chocs',
'uploader_id': '112960472096793',
'uploader_url': 'https://www.facebook.com/Caseachocs/',
'timestamp': 1768498293,
'upload_date': '20260115',
'like_count': int,
'description': 'md5:f02a255fcf7dce6ed40e9494cf4bc49a',
},
'playlist_count': 3,
}, {
'url': 'https://www.facebook.com/ads/library/?id=1704834754236452',
'info_dict': {
'id': '1704834754236452',
'ext': 'mp4',
'title': 'Get answers now!',
'description': 'Ask the best psychics and get accurate answers on questions that bother you!',
'uploader': 'Your Relationship Advisor',
'uploader_id': '108939234726306',
'uploader_url': 'https://www.facebook.com/100068970634636/',
'like_count': int,
'thumbnail': r're:https://.+/.+\.jpg',
},
}, {
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
'only_matching': True,
@ -1123,8 +1132,11 @@ class FacebookAdsIE(InfoExtractor):
post_data = traverse_obj(
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = get_first(post_data, (
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., (
('__bbox', 'result', 'data', 'ad_library_main', 'deeplink_ad_archive_result', 'deeplink_ad_archive'),
# old path
('entryPointRoot', 'otherProps', 'deeplinkAdCard'),
), 'snapshot', {dict}))
if not data:
raise ExtractorError('Unable to extract ad data')
@ -1140,11 +1152,12 @@ class FacebookAdsIE(InfoExtractor):
'title': title,
'description': markup or None,
}, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
'description': (
(('body', 'text'), 'link_description'),
{lambda x: x if not x.startswith('{{product.') else None}, any),
'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}),
}))
@ -1155,7 +1168,8 @@ class FacebookAdsIE(InfoExtractor):
entries.append({
'id': f'{video_id}_{idx}',
'title': entry.get('title') or title,
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
'description': traverse_obj(
entry, 'body', 'link_description', expected_type=str) or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry),
})

View File

@ -318,9 +318,48 @@ class FloatplaneIE(FloatplaneBaseIE):
self.raise_login_required()
class FloatplaneChannelIE(InfoExtractor):
class FloatplaneChannelBaseIE(InfoExtractor):
"""Subclasses must set _RESULT_IE, _BASE_URL and _PAGE_SIZE"""
def _fetch_page(self, display_id, creator_id, channel_id, page):
query = {
'id': creator_id,
'limit': self._PAGE_SIZE,
'fetchAfter': page * self._PAGE_SIZE,
}
if channel_id:
query['channel'] = channel_id
page_data = self._download_json(
f'{self._BASE_URL}/api/v3/content/creator', display_id,
query=query, note=f'Downloading page {page + 1}')
for post in page_data or []:
yield self.url_result(
f'{self._BASE_URL}/post/{post["id"]}',
self._RESULT_IE, id=post['id'], title=post.get('title'),
release_timestamp=parse_iso8601(post.get('releaseDate')))
def _real_extract(self, url):
creator, channel = self._match_valid_url(url).group('id', 'channel')
display_id = join_nonempty(creator, channel, delim='/')
creator_data = self._download_json(
f'{self._BASE_URL}/api/v3/creator/named',
display_id, query={'creatorURL[0]': creator})[0]
channel_data = traverse_obj(
creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
display_id, title=channel_data.get('title') or creator_data.get('title'),
description=channel_data.get('about') or creator_data.get('about'))
class FloatplaneChannelIE(FloatplaneChannelBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
_BASE_URL = 'https://www.floatplane.com'
_PAGE_SIZE = 20
_RESULT_IE = FloatplaneIE
_TESTS = [{
'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
'info_dict': {
@ -346,36 +385,3 @@ class FloatplaneChannelIE(InfoExtractor):
},
'playlist_mincount': 200,
}]
def _fetch_page(self, display_id, creator_id, channel_id, page):
query = {
'id': creator_id,
'limit': self._PAGE_SIZE,
'fetchAfter': page * self._PAGE_SIZE,
}
if channel_id:
query['channel'] = channel_id
page_data = self._download_json(
'https://www.floatplane.com/api/v3/content/creator', display_id,
query=query, note=f'Downloading page {page + 1}')
for post in page_data or []:
yield self.url_result(
f'https://www.floatplane.com/post/{post["id"]}',
FloatplaneIE, id=post['id'], title=post.get('title'),
release_timestamp=parse_iso8601(post.get('releaseDate')))
def _real_extract(self, url):
creator, channel = self._match_valid_url(url).group('id', 'channel')
display_id = join_nonempty(creator, channel, delim='/')
creator_data = self._download_json(
'https://www.floatplane.com/api/v3/creator/named',
display_id, query={'creatorURL[0]': creator})[0]
channel_data = traverse_obj(
creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
display_id, title=channel_data.get('title') or creator_data.get('title'),
description=channel_data.get('about') or creator_data.get('about'))

View File

@ -1,4 +1,4 @@
from .floatplane import FloatplaneBaseIE
from .floatplane import FloatplaneBaseIE, FloatplaneChannelBaseIE
class SaucePlusIE(FloatplaneBaseIE):
@ -39,3 +39,19 @@ class SaucePlusIE(FloatplaneBaseIE):
def _real_initialize(self):
if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'):
self.raise_login_required()
class SaucePlusChannelIE(FloatplaneChannelBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
_BASE_URL = 'https://www.sauceplus.com'
_RESULT_IE = SaucePlusIE
_PAGE_SIZE = 20
_TESTS = [{
'url': 'https://www.sauceplus.com/channel/williamosman/home',
'info_dict': {
'id': 'williamosman',
'title': 'William Osman',
'description': 'md5:a67bc961d23c293b2c5308d84f34f26c',
},
'playlist_mincount': 158,
}]

View File

@ -9,39 +9,39 @@ class Tele5IE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?'
_TESTS = [{
# slug_a and slug_b
'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane',
'url': 'https://tele5.de/mediathek/star-trek-enterprise/vox-sola',
'info_dict': {
'id': '6852024',
'id': '4140114',
'ext': 'mp4',
'title': 'Quarantäne',
'description': 'md5:6af0373bd0fcc4f13e5d47701903d675',
'episode': 'Episode 73',
'episode_number': 73,
'season': 'Season 4',
'season_number': 4,
'series': 'Stargate Atlantis',
'upload_date': '20240525',
'timestamp': 1716643200,
'duration': 2503.2,
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
'creators': ['Tele5'],
'title': 'Vox Sola',
'description': 'md5:329d115f74324d4364efc1a11c4ea7c9',
'duration': 2542.76,
'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g',
'tags': [],
'creators': ['Tele5'],
'series': 'Star Trek - Enterprise',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 22',
'episode_number': 22,
'timestamp': 1770491100,
'upload_date': '20260207',
},
}, {
# only slug_a
'url': 'https://tele5.de/mediathek/inside-out',
'url': 'https://tele5.de/mediathek/30-miles-from-nowhere-im-wald-hoert-dich-niemand-schreien',
'info_dict': {
'id': '6819502',
'id': '4102641',
'ext': 'mp4',
'title': 'Inside out',
'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd',
'series': 'Inside out',
'upload_date': '20240523',
'timestamp': 1716494400,
'duration': 5343.4,
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
'creators': ['Tele5'],
'title': '30 Miles from Nowhere - Im Wald hört dich niemand schreien',
'description': 'md5:0b731539f39ee186ebcd9dd444a86fc2',
'duration': 4849.96,
'thumbnail': r're:https://[^/.]+\.disco-api\.com/.+\.jpe?g',
'tags': [],
'creators': ['Tele5'],
'series': '30 Miles from Nowhere - Im Wald hört dich niemand schreien',
'timestamp': 1770417300,
'upload_date': '20260206',
},
}, {
# playlist
@ -50,20 +50,27 @@ class Tele5IE(DiscoveryPlusBaseIE):
'id': 'mediathek-schlefaz',
},
'playlist_mincount': 3,
'skip': 'Dead link',
}]
def _real_extract(self, url):
parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
query = {'environment': 'tele5', 'v': '2'}
query = {
'include': 'default',
'filter[environment]': 'tele5',
'v': '2',
}
if not slug_b:
endpoint = f'page/{slug_a}'
query['parent_slug'] = parent_slug
else:
endpoint = f'videos/{slug_b}'
query['filter[show.slug]'] = slug_a
cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
endpoint = f'shows/{slug_a}'
query['filter[video.slug]'] = slug_b
cms_data = self._download_json(f'https://public.aurora.enhanced.live/site/{endpoint}/', playlist_id, query=query)
return self.playlist_result(map(
functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),

View File

@ -16,7 +16,7 @@ from ._redirect import (
YoutubeYtBeIE,
YoutubeYtUserIE,
)
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchIE, YoutubeSearchURLIE
from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE
from ._video import YoutubeIE
@ -39,7 +39,6 @@ for _cls in [
YoutubeYtBeIE,
YoutubeYtUserIE,
YoutubeMusicSearchURLIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubePlaylistIE,

View File

@ -28,21 +28,6 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
}]
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
_TESTS = [{
'url': 'ytsearchdate5:youtube-dl test video',
'playlist_count': 5,
'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
},
}]
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'