From 98036ccd4fd9a6b15e025bbc164e27286e342be7 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 22 Jun 2026 00:48:47 +0200 Subject: [PATCH] [cleanup] Fix invalid info dict fields (#17007) Authored by: seproDev --- yt_dlp/extractor/adobetv.py | 2 +- yt_dlp/extractor/afreecatv.py | 3 ++- yt_dlp/extractor/alura.py | 2 +- yt_dlp/extractor/audius.py | 4 +++- yt_dlp/extractor/bilibili.py | 4 +++- yt_dlp/extractor/err.py | 4 ++-- yt_dlp/extractor/ichinanalive.py | 6 +++--- yt_dlp/extractor/idagio.py | 4 ++-- yt_dlp/extractor/itv.py | 3 ++- yt_dlp/extractor/lecturio.py | 3 +-- yt_dlp/extractor/neteasemusic.py | 2 +- yt_dlp/extractor/peertube.py | 13 +++++++------ yt_dlp/extractor/qqmusic.py | 5 +---- yt_dlp/extractor/rcti.py | 14 ++++++++++---- yt_dlp/extractor/redgifs.py | 4 ++-- yt_dlp/extractor/rule34video.py | 2 +- yt_dlp/extractor/sovietscloset.py | 4 ++-- yt_dlp/extractor/spreaker.py | 2 +- yt_dlp/extractor/teamtreehouse.py | 2 +- yt_dlp/extractor/weibo.py | 2 +- yt_dlp/extractor/ximalaya.py | 10 +++++----- 21 files changed, 52 insertions(+), 43 deletions(-) diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 7a29a349c1..3d88d4f1ac 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -70,7 +70,7 @@ class AdobeTVVideoIE(InfoExtractor): for fmt in fmts: fmt.update(traverse_obj(source, { 'duration': ('duration', {float_or_none(scale=1000)}), - 'filesize': ('kilobytes', {float_or_none(invscale=1000)}), + 'filesize': ('kilobytes', {int_or_none(invscale=1000)}), 'format_id': (('format', 'label'), {str}, all, {lambda x: join_nonempty(*x)}), 'height': ('height', {int_or_none}), 'tbr': ('bitrate', {int_or_none}), diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index d8a025976e..56bfa36529 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, orderedSet, parse_iso8601, + str_or_none, url_or_none, urlencode_postdata, urljoin, @@ -465,7 +466,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE): for item in info['data']: yield self.url_result( f'https://vod.sooplive.com/player/{item["title_no"]}/', - AfreecaTVIE, item['title_no'], item.get('title_name')) + AfreecaTVIE, str_or_none(item['title_no']), item.get('title_name')) def _real_extract(self, url): user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index ce03a4265b..957a5dc0ea 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -155,7 +155,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE '_type': 'url_transparent', 'id': self._match_id(video_url), 'url': video_url, - 'id_key': self.ie_key(), + 'ie_key': self.ie_key(), 'chapter': chapter, 'chapter_number': chapter_number, } diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index c611c6e081..830975a27d 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -2,7 +2,7 @@ import random import urllib.parse from .common import InfoExtractor -from ..utils import ExtractorError, str_or_none, try_get +from ..utils import ExtractorError, str_or_none, try_get, url_or_none class AudiusBaseIE(InfoExtractor): @@ -143,6 +143,8 @@ class AudiusIE(AudiusBaseIE): thumbnails = [] if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): + if not url_or_none(thumbnail_url): + continue thumbnail = { 'url': thumbnail_url, } diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 207d48241f..4e11fc63d1 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1398,7 +1398,9 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE): def get_entries(page_data): # data is None when the playlist is empty for entry in page_data.get('data') or []: - yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id']) + yield self.url_result( + f'https://www.bilibili.com/audio/au{entry["id"]}', + BilibiliAudioIE, str_or_none(entry['id'])) _, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) return self.playlist_result(paged_list, playlist_id) diff --git a/yt_dlp/extractor/err.py b/yt_dlp/extractor/err.py index c931fa82fd..64a7e28f04 100644 --- a/yt_dlp/extractor/err.py +++ b/yt_dlp/extractor/err.py @@ -235,7 +235,7 @@ class ERRArhiivIE(InfoExtractor): 'title': 'Kontsertpalad: 255 | L. Beethoveni sonaat c-moll, "Pateetiline"', 'description': 'md5:a70f4ff23c3618f3be63f704bccef063', 'series': 'Kontsertpalad', - 'episode_id': 255, + 'episode_id': '255', 'timestamp': 1666152162, 'upload_date': '20221019', 'release_year': 1970, @@ -282,7 +282,7 @@ class ERRArhiivIE(InfoExtractor): 'title': ('title', {str}), 'series': ('seriesTitle', {str}, filter), 'series_id': ('seriesId', {str}, filter), - 'episode_id': ('episode', {int_or_none}), + 'episode_id': ('episode', {str}, filter), 'description': ('synopsis', {str}, filter), 'timestamp': ('uploadDate', {parse_iso8601}), 'modified_timestamp': ('dateModified', {parse_iso8601}), diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index 475d33593d..af1a002931 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -125,20 +125,20 @@ class IchinanaLiveClipIE(InfoExtractor): formats = [] if view_data.get('videoURL'): formats.append({ - 'id': 'video', + 'format_id': 'video', 'url': view_data['videoURL'], 'quality': -1, }) if view_data.get('transcodeURL'): formats.append({ - 'id': 'transcode', + 'format_id': 'transcode', 'url': view_data['transcodeURL'], 'quality': -1, }) if view_data.get('srcVideoURL'): # highest quality formats.append({ - 'id': 'srcVideo', + 'format_id': 'srcVideo', 'url': view_data['srcVideoURL'], 'quality': 1, }) diff --git a/yt_dlp/extractor/idagio.py b/yt_dlp/extractor/idagio.py index a99c559065..16c445edb8 100644 --- a/yt_dlp/extractor/idagio.py +++ b/yt_dlp/extractor/idagio.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import int_or_none, unified_timestamp, url_or_none +from ..utils import int_or_none, str_or_none, unified_timestamp, url_or_none from ..utils.traversal import traverse_obj @@ -83,7 +83,7 @@ class IdagioPlaylistBaseIE(InfoExtractor): recording_id = track_data['recording']['id'] yield self.url_result( f'https://app.idagio.com/recordings/{recording_id}?trackId={track_id}', - ie=IdagioTrackIE, video_id=track_id) + ie=IdagioTrackIE, video_id=str_or_none(track_id)) def _real_extract(self, url): playlist_id = self._match_id(url) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 1f4020847c..65e6443729 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -13,6 +13,7 @@ from ..utils import ( merge_dicts, parse_duration, smuggle_url, + str_or_none, try_get, url_basename, url_or_none, @@ -247,7 +248,7 @@ class ITVBTCCIE(InfoExtractor): for video in json_map: if not any(traverse_obj(video, ('data', attr)) == 'Brightcove' for attr in ('name', 'type')): continue - video_id = video['data']['id'] + video_id = str_or_none(video['data']['id']) account_id = video['data']['accountId'] player_id = video['data']['playerId'] entries.append(self.url_result( diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 4cfb872241..313bb5f702 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -5,7 +5,6 @@ from ..utils import ( ExtractorError, clean_html, determine_ext, - float_or_none, int_or_none, str_or_none, url_or_none, @@ -130,7 +129,7 @@ class LecturioIE(LecturioBaseIE): f = { 'url': file_url, 'format_id': label, - 'filesize': float_or_none(filesize, invscale=1000), + 'filesize': int_or_none(filesize, invscale=1000), } if label: mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index e3b5f47936..0617187800 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -658,7 +658,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): entries.extend(self.url_result( f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE, - program['id'], program.get('name')) for program in info['programs']) + str_or_none(program['id']), program.get('name')) for program in info['programs']) if not metainfo: metainfo = traverse_obj(info, ('programs', 0, 'radio', { 'title': ('name', {str}), diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 40312cf7dc..34b9bc55a7 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1686,7 +1686,7 @@ class PeerTubePlaylistIE(InfoExtractor): 'description': 'Diversas palestras do Richard Stallman no Brasil.', 'title': 'Richard Stallman no Brasil', 'channel': 'debianbrazilteam', - 'channel_id': 1522, + 'channel_id': '1522', 'thumbnail': r're:https?://peertube\.debian\.social/lazy-static/thumbnails/.+\.jpg', 'timestamp': 1599676222, 'upload_date': '20200909', @@ -1698,7 +1698,7 @@ class PeerTubePlaylistIE(InfoExtractor): 'id': 'chocobozzz', 'title': 'chocobozzz', 'channel': 'chocobozzz', - 'channel_id': 37855, + 'channel_id': '37855', 'thumbnail': '', 'timestamp': 1553874564, 'upload_date': '20190329', @@ -1710,7 +1710,7 @@ class PeerTubePlaylistIE(InfoExtractor): 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', 'title': 'Les vidéos de Framasoft', 'channel': 'framasoft', - 'channel_id': 3, + 'channel_id': '3', 'thumbnail': '', 'timestamp': 1519917377, 'upload_date': '20180301', @@ -1720,9 +1720,10 @@ class PeerTubePlaylistIE(InfoExtractor): 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', 'info_dict': { 'id': 'blender_open_movies@video.blender.org', - 'title': 'Official Blender Open Movies', + 'title': 'Blender Open Movies', + 'description': 'md5:3dfabff00857fe3896fb222eb2cb1b80', 'channel': 'blender', - 'channel_id': 1926, + 'channel_id': '1926', 'thumbnail': '', 'timestamp': 1540472902, 'upload_date': '20181025', @@ -1756,7 +1757,7 @@ class PeerTubePlaylistIE(InfoExtractor): playlist_description = info.get('description') playlist_timestamp = unified_timestamp(info.get('createdAt')) channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') - channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') + channel_id = str_or_none(try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id')) thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') entries = OnDemandPagedList(functools.partial( diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 56a8e7300a..f631dc983c 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -79,7 +79,6 @@ class QQMusicIE(QQMusicBaseIE): 'creators': ['ケーキ姫', 'JUMA'], 'genres': ['Pop'], 'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9', - 'size': 4501244, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', 'subtitles': 'count:1', }, @@ -113,7 +112,6 @@ class QQMusicIE(QQMusicBaseIE): 'creators': ['李季美'], 'genres': [], 'description': 'md5:fc711212aa623b28534954dc4bd67385', - 'size': 3535730, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, }, { @@ -191,7 +189,7 @@ class QQMusicIE(QQMusicBaseIE): 'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']), 'format': format_id, 'format_id': format_id, - 'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})), + 'filesize': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})), 'quality': format_info.get('preference'), 'abr': format_info.get('abr'), 'ext': format_info.get('ext'), @@ -261,7 +259,6 @@ class QQMusicSingerIE(QQMusicBaseIE): 'creators': ['桃几OvO'], 'genres': ['Pop'], 'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae', - 'size': 3970822, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, }], diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index c8e57e2aba..cc693fec13 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -7,6 +7,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, dict_get, + str_or_none, strip_or_none, traverse_obj, try_get, @@ -80,7 +81,11 @@ class RCTIPlusIE(RCTIPlusBaseIE): 'timestamp': 1587561540, 'upload_date': '20200422', 'series': 'iNews Malam', - 'channel': 'INews', + 'channel': 'INEWS', + 'channel_id': '4', + 'thumbnail': 'https://static.rctiplus.id/media/2000/files/fta_rcti/Portrait/iNews_Malam/inews_malam_768x1152.jpg', + 'categories': ['Hard News'], + 'live_status': 'not_live', }, }, { # Missed event/replay 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', @@ -117,12 +122,13 @@ class RCTIPlusIE(RCTIPlusBaseIE): 'url': 'https://www.rctiplus.com/live-event/1/rcti', 'info_dict': { 'id': 'v_lt1', - 'title': 'RCTI', + 'title': r're:RCTI \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'display_id': 'rcti', 'ext': 'mp4', 'timestamp': 1546344000, 'upload_date': '20190101', - 'is_live': True, + 'thumbnail': 'https://static.rctiplus.id/media/2000/files/fta_rcti/Channel_Logo/por-RCTI.png', + 'live_status': 'is_live', }, 'params': { 'skip_download': True, @@ -207,7 +213,7 @@ class RCTIPlusIE(RCTIPlusBaseIE): 'season_number': video_meta.get('season'), 'episode_number': video_meta.get('episode'), 'channel': video_json.get('tv_name'), - 'channel_id': video_json.get('tv_id'), + 'channel_id': str_or_none(video_json.get('tv_id')), 'formats': formats, 'thumbnails': thumbnails, 'is_live': video_type == 'live-event' and not is_upcoming, diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index cd3cd323e5..c7787c949a 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -30,7 +30,7 @@ class RedGifsBaseIE(InfoExtractor): quality = qualities(tuple(self._FORMATS.keys())) orig_height = int_or_none(gif_data.get('height')) - aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + aspect_ratio = try_get(gif_data, lambda x: x['width'] / orig_height) formats = [] for format_id, height in self._FORMATS.items(): @@ -41,7 +41,7 @@ class RedGifsBaseIE(InfoExtractor): formats.append({ 'url': video_url, 'format_id': format_id, - 'width': height * aspect_ratio if aspect_ratio else None, + 'width': int_or_none(height * aspect_ratio) if aspect_ratio else None, 'height': height, 'quality': quality(format_id), }) diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index 3630f5e40c..22c94ff1fe 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -77,7 +77,7 @@ class Rule34VideoIE(InfoExtractor): formats.append({ 'url': url, 'ext': ext.lower(), - 'quality': quality, + 'quality': int_or_none(quality), }) categories, creators, uploader, uploader_url = [None] * 4 diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index d35214aa84..499402f16b 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,6 +1,6 @@ from .bunnycdn import BunnyCdnIE from .common import InfoExtractor -from ..utils import make_archive_id, try_get, unified_timestamp +from ..utils import make_archive_id, str_or_none, try_get, unified_timestamp class SovietsClosetBaseIE(InfoExtractor): @@ -182,7 +182,7 @@ class SovietsClosetPlaylistIE(SovietsClosetBaseIE): entries = [{ **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()), **self.video_meta( - video_id=stream['id'], game_name=game['name'], category_name=category.get('name'), + video_id=str_or_none(stream['id']), game_name=game['name'], category_name=category.get('name'), episode_number=i + 1, stream_date=stream.get('date')), } for i, stream in enumerate(category['streams'])] diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index c64c2fcd2e..ef8a5af88c 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -39,7 +39,7 @@ def _extract_episode(data, episode_id=None): return { 'id': str(episode_id or data['episode_id']), 'url': download_url, - 'display_id': data.get('permalink'), + 'display_id': str_or_none(data.get('permalink')), 'title': title, 'description': data.get('description'), 'timestamp': unified_timestamp(data.get('published_at')), diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index ba25cdcf65..f4e230f522 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -105,7 +105,7 @@ class TeamTreeHouseIE(InfoExtractor): '_type': 'url_transparent', 'id': self._match_id(page_url), 'url': page_url, - 'id_key': self.ie_key(), + 'ie_key': self.ie_key(), } if extract_info: entry.update(extract_info) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index dc8a2cd753..c02ce8e028 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -94,7 +94,7 @@ class WeiboBaseIE(InfoExtractor): **parse_resolution(resolution), **traverse_obj(media_info, ( 'video_details', lambda _, v: v['label'].startswith(format_id), { - 'size': ('size', {int_or_none}), + 'filesize': ('size', {int_or_none}), 'tbr': ('bitrate', {int_or_none}), }, ), get_all=False), diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 02bf6a7beb..5e1b4c7804 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -36,11 +36,11 @@ class XimalayaIE(XimalayaBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'thumbnails': [ { - 'name': 'cover_url', + 'id': 'cover_url', 'url': r're:^https?://.*\.jpg', }, { - 'name': 'cover_url_142', + 'id': 'cover_url_142', 'url': r're:^https?://.*\.jpg', 'width': 180, 'height': 180, @@ -65,11 +65,11 @@ class XimalayaIE(XimalayaBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'thumbnails': [ { - 'name': 'cover_url', + 'id': 'cover_url', 'url': r're:^https?://.*\.jpg', }, { - 'name': 'cover_url_142', + 'id': 'cover_url_142', 'url': r're:^https?://.*\.jpg', 'width': 180, 'height': 180, @@ -172,7 +172,7 @@ class XimalayaIE(XimalayaBaseIE): for k in audio_info: # cover pics kyes like: cover_url', 'cover_url_142' if k.startswith('cover_url'): - thumbnail = {'name': k, 'url': audio_info[k]} + thumbnail = {'id': k, 'url': audio_info[k]} if k == 'cover_url_142': thumbnail['width'] = 180 thumbnail['height'] = 180