diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 53e0b4eaf8..32e80df952 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -16,7 +16,6 @@ from yt_dlp.extractor import ( CeskaTelevizeIE, DailymotionIE, DemocracynowIE, - LyndaIE, RaiPlayIE, RTVEALaCartaIE, TedTalkIE, @@ -250,20 +249,6 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.assertFalse(subtitles) -@is_download_test -@unittest.skip('IE broken') -class TestLyndaSubtitles(BaseTestSubtitles): - url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' - IE = LyndaIE - - def test_allsubtitles(self): - self.DL.params['writesubtitles'] = True - self.DL.params['allsubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), {'en'}) - self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') - - @is_download_test @unittest.skip('IE broken') class TestNPOSubtitles(BaseTestSubtitles): diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1a29a93eda..f47a36099a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -54,7 +54,6 @@ from .agora import ( WyborczaPodcastIE, WyborczaVideoIE, ) -from .airtv import AirTVIE from .aitube import AitubeKZVideoIE from .alibaba import AlibabaIE from .aliexpress import AliExpressLiveIE @@ -65,10 +64,6 @@ from .allstar import ( AllstarProfileIE, ) from .alphaporno import AlphaPornoIE -from .alsace20tv import ( - Alsace20TVEmbedIE, - Alsace20TVIE, -) from .altcensored import ( AltCensoredChannelIE, AltCensoredIE, @@ -93,7 +88,6 @@ from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) -from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE from .antenna import ( Ant1NewsGrArticleIE, @@ -106,10 +100,6 @@ from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .applepodcasts import ApplePodcastsIE -from .appletrailers import ( - AppleTrailersIE, - AppleTrailersSectionIE, -) from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, @@ -140,7 +130,6 @@ from .asobichannel import ( from .asobistage import AsobiStageIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE -from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiodraft import ( @@ -157,13 +146,6 @@ from .audius import ( AudiusProfileIE, AudiusTrackIE, ) -from .awaan import ( - AWAANIE, - AWAANLiveIE, - AWAANSeasonIE, - AWAANVideoIE, -) -from .axs import AxsIE from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( @@ -190,10 +172,6 @@ from .bbc import ( BBCCoUkPlaylistIE, ) from .beacon import BeaconTvIE -from .beatbump import ( - BeatBumpPlaylistIE, - BeatBumpVideoIE, -) from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE @@ -210,7 +188,6 @@ from .bibeltv import ( BibelTVSeriesIE, BibelTVVideoIE, ) -from .bigflix import BigflixIE from .bigo import BigoIE from .bild import BildIE from .bilibili import ( @@ -255,7 +232,6 @@ from .blerp import BlerpIE from .blogger import BloggerIE from .bloomberg import BloombergIE from .bluesky import BlueskyIE -from .bokecc import BokeCCIE from .bongacams import BongaCamsIE from .boosty import BoostyIE from .bostonglobe import BostonGlobeIE @@ -288,14 +264,8 @@ from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE -from .caffeinetv import CaffeineTVIE -from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE -from .camdemy import ( - CamdemyFolderIE, - CamdemyIE, -) from .camfm import ( CamFMEpisodeIE, CamFMShowIE, @@ -371,7 +341,6 @@ from .ciscolive import ( from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE from .clipchamp import ClipchampIE -from .clippit import ClippitIE from .cliprs import ClipRsIE from .closertotruth import CloserToTruthIE from .cloudflarestream import CloudflareStreamIE @@ -395,7 +364,6 @@ from .commonprotocols import ( ViewSourceIE, ) from .condenast import CondeNastIE -from .contv import CONtvIE from .corus import CorusIE from .coub import CoubIE from .cozytv import CozyTVIE @@ -510,7 +478,6 @@ from .dplay import ( ) from .drbonanza import DRBonanzaIE from .dreisat import DreiSatIE -from .drooble import DroobleIE from .dropbox import DropboxIE from .dropout import ( DropoutIE, @@ -525,10 +492,6 @@ from .drtv import ( DRTVSeriesIE, ) from .dtube import DTubeIE -from .duboku import ( - DubokuIE, - DubokuPlaylistIE, -) from .dumpert import DumpertIE from .duoplay import DuoplayIE from .dvtv import DVTVIE @@ -546,8 +509,6 @@ from .eggs import ( EggsArtistIE, EggsIE, ) -from .eighttracks import EightTracksIE -from .eitb import EitbIE from .elementorembed import ElementorEmbedIE from .elonet import ElonetIE from .elpais import ElPaisIE @@ -591,7 +552,6 @@ from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE from .expressen import ExpressenIE -from .eyedotv import EyedoTVIE from .facebook import ( FacebookAdsIE, FacebookIE, @@ -655,7 +615,6 @@ from .foxnews import ( from .foxsports import FoxSportsIE from .fptplay import FptplayIE from .francaisfacile import FrancaisFacileIE -from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVInfoIE, @@ -672,14 +631,10 @@ from .frontendmasters import ( FrontendMastersIE, FrontendMastersLessonIE, ) -from .fujitv import FujiTVFODPlus7IE from .funk import FunkIE from .funker530 import Funker530IE from .fuyintv import FuyinTVIE -from .gab import ( - GabIE, - GabTVIE, -) +from .gab import GabIE from .gaia import GaiaIE from .gamedevtv import GameDevTVDashboardIE from .gamejolt import ( @@ -743,16 +698,10 @@ from .googledrive import ( GoogleDriveFolderIE, GoogleDriveIE, ) -from .googlepodcasts import ( - GooglePodcastsFeedIE, - GooglePodcastsIE, -) from .googlesearch import GoogleSearchIE from .goplay import GoPlayIE from .gopro import GoProIE -from .goshgay import GoshgayIE from .gotostage import GoToStageIE -from .gputechconf import GPUTechConfIE from .graspop import GraspopIE from .gronkh import ( GronkhFeedIE, @@ -769,7 +718,6 @@ from .hgtv import HGTVComShowIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitrecord import HitRecordIE -from .hketv import HKETVIE from .hollywoodreporter import ( HollywoodReporterIE, HollywoodReporterPlaylistIE, @@ -818,7 +766,6 @@ from .idagio import ( IdagioRecordingIE, IdagioTrackIE, ) -from .idolplus import IdolPlusIE from .ign import ( IGNIE, IGNArticleIE, @@ -851,7 +798,6 @@ from .instagram import ( InstagramUserIE, ) from .internazionale import InternazionaleIE -from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( IPrimaCNNIE, IPrimaIE, @@ -886,7 +832,6 @@ from .iwara import ( IwaraUserIE, ) from .ixigua import IxiguaIE -from .izlesene import IzleseneIE from .jamendo import ( JamendoAlbumIE, JamendoIE, @@ -939,11 +884,9 @@ from .kika import ( KikaIE, KikaPlaylistIE, ) -from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .kommunetv import KommunetvIE from .kompas import KompasVideoIE -from .koo import KooIE from .krasview import KrasViewIE from .kth import KTHIE from .ku6 import Ku6IE @@ -991,7 +934,6 @@ from .lecturio import ( from .leeco import ( LeIE, LePlaylistIE, - LetvCloudIE, ) from .lefigaro import ( LeFigaroVideoEmbedIE, @@ -1020,11 +962,6 @@ from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE from .litv import LiTVIE from .livejournal import LiveJournalIE -from .livestream import ( - LivestreamIE, - LivestreamOriginalIE, - LivestreamShortenerIE, -) from .livestreamfails import LivestreamfailsIE from .lnk import LnkIE from .locipo import ( @@ -1048,10 +985,6 @@ from .lsm import ( LSMReplayIE, ) from .lumni import LumniIE -from .lynda import ( - LyndaCourseIE, - LyndaIE, -) from .maariv import MaarivIE from .magellantv import MagellanTVIE from .magentamusik import MagentaMusikIE @@ -1117,13 +1050,11 @@ from .microsoftembed import ( MicrosoftLearnSessionIE, MicrosoftMediusIE, ) -from .microsoftstream import MicrosoftStreamIE from .minds import ( MindsChannelIE, MindsGroupIE, MindsIE, ) -from .minoto import MinotoIE from .mir24tv import Mir24TvIE from .mirrativ import ( MirrativIE, @@ -1157,18 +1088,9 @@ from .mlb import ( from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE from .mojevideo import MojevideoIE -from .mojvideo import MojvideoIE from .monstercat import MonstercatIE -from .motherless import ( - MotherlessGalleryIE, - MotherlessGroupIE, - MotherlessIE, - MotherlessUploaderIE, -) from .motorsport import MotorsportIE from .moviepilot import MoviepilotIE -from .moview import MoviewPlayIE -from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import MTVIE @@ -1179,12 +1101,6 @@ from .murrtube import ( ) from .museai import MuseAIIE from .musescore import MuseScoreIE -from .musicdex import ( - MusicdexAlbumIE, - MusicdexArtistIE, - MusicdexPlaylistIE, - MusicdexSongIE, -) from .mux import MuxIE from .mx3 import ( Mx3IE, @@ -1212,14 +1128,10 @@ from .nate import ( NateIE, NateProgramIE, ) -from .nationalgeographic import ( - NationalGeographicTVIE, - NationalGeographicVideoIE, -) +from .nationalgeographic import NationalGeographicTVIE from .naver import ( NaverIE, NaverLiveIE, - NaverNowIE, ) from .nba import ( NBAIE, @@ -1257,7 +1169,6 @@ from .nebula import ( NebulaSubscriptionsIE, ) from .nekohacker import NekoHackerIE -from .nerdcubed import NerdCubedFeedIE from .nest import ( NestClipIE, NestIE, @@ -1275,11 +1186,6 @@ from .neteasemusic import ( NetEaseMusicProgramIE, NetEaseMusicSingerIE, ) -from .netverse import ( - NetverseIE, - NetversePlaylistIE, - NetverseSearchIE, -) from .netzkino import NetzkinoIE from .newgrounds import ( NewgroundsIE, @@ -1389,11 +1295,6 @@ from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nubilesporn import NubilesPornIE -from .nuum import ( - NuumLiveIE, - NuumMediaIE, - NuumTabIE, -) from .nuvid import NuvidIE from .nytimes import ( NYTimesArticleIE, @@ -1426,7 +1327,6 @@ from .onet import ( OnetMVPIE, OnetPlIE, ) -from .onionstudios import OnionStudiosIE from .onsen import OnsenIE from .opencast import ( OpencastIE, @@ -1437,7 +1337,6 @@ from .openrec import ( OpenRecIE, OpenRecMovieIE, ) -from .ora import OraTVIE from .orf import ( ORFIPTVIE, ORFONIE, @@ -1511,26 +1410,18 @@ from .pinterest import ( PinterestCollectionIE, PinterestIE, ) -from .piramidetv import ( - PiramideTVChannelIE, - PiramideTVIE, -) -from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziCourseIE, PlatziIE, ) from .playerfm import PlayerFmIE -from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE -from .playwire import PlaywireIE from .pluralsight import ( PluralsightCourseIE, PluralsightIE, ) from .plutotv import PlutoTVIE -from .plvideo import PlVideoIE from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, @@ -1574,7 +1465,6 @@ from .prankcast import ( from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE -from .prosiebensat1 import ProSiebenSat1IE from .prx import ( PRXAccountIE, PRXSeriesIE, @@ -1586,7 +1476,6 @@ from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, ) -from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qdance import QDanceIE from .qingting import QingTingIE @@ -1610,10 +1499,6 @@ from .radiocanada import ( RadioCanadaAudioVideoIE, RadioCanadaIE, ) -from .radiocomercial import ( - RadioComercialIE, - RadioComercialPlaylistIE, -) from .radiode import RadioDeIE from .radiofrance import ( FranceCultureIE, @@ -1678,7 +1563,6 @@ from .redbulltv import ( RedBullTVRrnContentIE, ) from .reddit import RedditIE -from .redge import RedCDNLivxIE from .redgifs import ( RedGifsIE, RedGifsSearchIE, @@ -1692,13 +1576,11 @@ from .rentv import ( from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE -from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( RinseFMArtistPlaylistIE, RinseFMIE, ) -from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( RokfinChannelIE, @@ -1815,7 +1697,6 @@ from .senategov import ( SenateGovIE, SenateISVPIE, ) -from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE @@ -1828,7 +1709,6 @@ from .shahid import ( ShahidShowIE, ) from .sharepoint import SharePointIE -from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE from .shiey import ShieyIE from .showroomlive import ShowRoomLiveIE @@ -1873,7 +1753,6 @@ from .smotrim import ( SmotrimPlaylistIE, ) from .snapchat import SnapchatSpotlightIE -from .snotr import SnotrIE from .softwhiteunderbelly import SoftWhiteUnderbellyIE from .sohu import ( SohuIE, @@ -1923,7 +1802,6 @@ from .spreaker import ( SpreakerIE, SpreakerShowIE, ) -from .springboardplatform import SpringboardPlatformIE from .sproutvideo import ( SproutVideoIE, VidsIoIE, @@ -1940,7 +1818,6 @@ from .stacommu import ( TheaterComplexTownVODIE, ) from .stageplus import StagePlusVODConcertIE -from .stanfordoc import StanfordOpenClassroomIE from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( @@ -1948,10 +1825,6 @@ from .steam import ( SteamCommunityIE, SteamIE, ) -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) from .storyfire import ( StoryFireIE, StoryFireSeriesIE, @@ -1961,7 +1834,6 @@ from .streaks import StreaksIE from .streamable import StreamableIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE -from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE from .stv import STVPlayerIE from .subsplash import ( @@ -1979,8 +1851,6 @@ from .svt import ( SVTPlayIE, SVTSeriesIE, ) -from .swearnet import SwearnetEpisodeIE -from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .taptap import ( @@ -2039,10 +1909,6 @@ from .telequebec import ( ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE -from .tempo import ( - IVXPlayerIE, - TempoIE, -) from .tencent import ( IflixEpisodeIE, IflixSeriesIE, @@ -2068,7 +1934,6 @@ from .theguardian import ( TheGuardianPodcastPlaylistIE, ) from .thehighwire import TheHighWireIE -from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformFeedIE, @@ -2120,12 +1985,6 @@ from .toypics import ( ToypicsIE, ToypicsUserIE, ) -from .traileraddict import TrailerAddictIE -from .triller import ( - TrillerIE, - TrillerShortIE, - TrillerUserIE, -) from .trovo import ( TrovoChannelClipIE, TrovoChannelVodIE, @@ -2208,7 +2067,6 @@ from .tvplay import ( TVPlayHomeIE, TVPlayIE, ) -from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, TvwNewsIE, @@ -2248,12 +2106,8 @@ from .udemy import ( UdemyIE, ) from .udn import UDNEmbedIE -from .ufctv import ( - UFCTVIE, - UFCArabiaIE, -) +from .ufctv import UFCTVIE from .ukcolumn import UkColumnIE -from .uktvplay import UKTVPlayIE from .uliza import ( UlizaPlayerIE, UlizaPortalIE, @@ -2283,7 +2137,6 @@ from .ustudio import ( UstudioEmbedIE, UstudioIE, ) -from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE @@ -2308,20 +2161,7 @@ from .videocampus_sachsen import ( VideocampusSachsenIE, ViMPPlaylistIE, ) -from .videodetective import VideoDetectiveIE -from .videofyme import VideofyMeIE -from .videoken import ( - VideoKenCategoryIE, - VideoKenIE, - VideoKenPlayerIE, - VideoKenPlaylistIE, - VideoKenTopicIE, -) -from .videomore import ( - VideomoreIE, - VideomoreSeasonIE, - VideomoreVideoIE, -) +from .videoken import VideoKenPlayerIE from .videopress import VideoPressIE from .vidflex import VidflexIE from .vidio import ( @@ -2351,10 +2191,6 @@ from .vimeo import ( VimeoUserIE, VimeoWatchLaterIE, ) -from .vimm import ( - VimmIE, - VimmRecordingIE, -) from .viously import ViouslyIE from .viqeo import ViqeoIE from .visir import VisirIE @@ -2372,7 +2208,6 @@ from .vk import ( VKWallPostIE, ) from .vocaroo import VocarooIE -from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicy import ( VoicyChannelIE, @@ -2404,11 +2239,6 @@ from .vtv import ( VTVIE, VTVGoIE, ) -from .vuclip import VuClipIE -from .vvvvid import ( - VVVVIDIE, - VVVVIDShowIE, -) from .walla import WallaIE from .washingtonpost import ( WashingtonPostArticleIE, @@ -2418,7 +2248,6 @@ from .wat import WatIE from .wdr import ( WDRIE, WDRElefantIE, - WDRMobileIE, WDRPageIE, ) from .webcamerapl import WebcameraplIE @@ -2445,7 +2274,6 @@ from .weverse import ( WeverseMomentIE, ) from .wevidi import WeVidiIE -from .weyyak import WeyyakIE from .whowatch import WhoWatchIE from .whyp import WhypIE from .wikimedia import WikimediaIE @@ -2494,7 +2322,6 @@ from .ximalaya import ( from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE -from .xstream import XstreamIE from .xvideos import ( XVideosIE, XVideosQuickiesIE, @@ -2618,10 +2445,6 @@ from .zdf import ( ZDFIE, ZDFChannelIE, ) -from .zee5 import ( - Zee5IE, - Zee5SeriesIE, -) from .zeenews import ZeeNewsIE from .zenporn import ZenPornIE from .zetland import ZetlandDKArticleIE diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py deleted file mode 100644 index cee660dfcf..0000000000 --- a/yt_dlp/extractor/airtv.py +++ /dev/null @@ -1,96 +0,0 @@ -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..utils import ( - determine_ext, - int_or_none, - mimetype2ext, - parse_iso8601, - traverse_obj, -) - - -class AirTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P\w+)' - _TESTS = [{ - # without youtube_id - 'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ', - 'info_dict': { - 'id': 'W87jcWleSn2hXZN47zJZsQ', - 'ext': 'mp4', - 'release_date': '20221003', - 'release_timestamp': 1664792603, - 'channel_id': 'vgfManQlRQKgoFQ8i8peFQ', - 'title': 'md5:c12d49ed367c3dadaa67659aff43494c', - 'upload_date': '20221003', - 'duration': 151, - 'view_count': int, - 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', - 'timestamp': 1664792603, - }, - }, { - # with youtube_id - 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', - 'info_dict': { - 'id': '2ZTqmpee-bQ', - 'ext': 'mp4', - 'comment_count': int, - 'tags': 'count:11', - 'channel_follower_count': int, - 'like_count': int, - 'uploader': 'Newsflare', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp', - 'availability': 'public', - 'title': 'Geese Chase Alligator Across Golf Course', - 'uploader_id': 'NewsflareBreaking', - 'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ', - 'description': 'md5:99b21d9cea59330149efbd9706e208f5', - 'age_limit': 0, - 'channel_id': 'UCzSSoloGEz10HALUAbYhngQ', - 'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking', - 'view_count': int, - 'categories': ['News & Politics'], - 'live_status': 'not_live', - 'playable_in_embed': True, - 'channel': 'Newsflare', - 'duration': 37, - 'upload_date': '20180511', - }, - }] - - def _get_formats_and_subtitle(self, json_data, video_id): - formats, subtitles = [], {} - for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...): - ext = determine_ext(source.get('src'), mimetype2ext(source.get('type'))) - if ext == 'm3u8': - fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id) - formats.extend(fmts) - self._merge_subtitles(subs, target=subtitles) - else: - formats.append({'url': source.get('src'), 'ext': ext}) - return formats, subtitles - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id] - if nextjs_json.get('youtube_id'): - return self.url_result( - f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE) - - formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id) - return { - 'id': display_id, - 'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage), - 'formats': formats, - 'subtitles': subtitles, - 'description': nextjs_json.get('description') or None, - 'duration': int_or_none(nextjs_json.get('duration')), - 'thumbnails': [ - {'url': thumbnail} - for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))], - 'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'), - 'timestamp': parse_iso8601(nextjs_json.get('created')), - 'release_timestamp': parse_iso8601(nextjs_json.get('published')), - 'view_count': int_or_none(nextjs_json.get('views')), - } diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py deleted file mode 100644 index c315e4f217..0000000000 --- a/yt_dlp/extractor/alsace20tv.py +++ /dev/null @@ -1,83 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - dict_get, - get_element_by_class, - int_or_none, - unified_strdate, - url_or_none, -) - - -class Alsace20TVBaseIE(InfoExtractor): - def _extract_video(self, video_id, url=None): - info = self._download_json( - f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', - video_id) or {} - title = info.get('titre') - - formats = [] - for res, fmt_url in (info.get('files') or {}).items(): - formats.extend( - self._extract_smil_formats(fmt_url, video_id, fatal=False) - if '/smil:_' in fmt_url - else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) - - webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) - upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': clean_html(get_element_by_class('wysiwyg', webpage)), - 'upload_date': upload_date, - 'thumbnail': thumbnail, - 'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None), - 'view_count': int_or_none(info.get('nb_vues')), - } - - -class Alsace20TVIE(Alsace20TVBaseIE): - _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P[\w]+)' - _TESTS = [{ - 'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html', - 'info_dict': { - 'id': 'lyNHCXpYJh', - 'ext': 'mp4', - 'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7', - 'title': 'Votre JT du jeudi 3 février', - 'upload_date': '20220203', - 'thumbnail': r're:https?://.+\.jpg', - 'duration': 1073, - 'view_count': int, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_video(video_id, url) - - -class Alsace20TVEmbedIE(Alsace20TVBaseIE): - _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P[\w]+)' - _TESTS = [{ - 'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh', - # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', - 'info_dict': { - 'id': 'lyNHCXpYJh', - 'ext': 'mp4', - 'title': 'Votre JT du jeudi 3 février', - 'upload_date': '20220203', - 'thumbnail': r're:https?://.+\.jpg', - 'view_count': int, - }, - 'params': { - 'format': 'bestvideo', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_video(video_id) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py deleted file mode 100644 index 652154a4a8..0000000000 --- a/yt_dlp/extractor/anchorfm.py +++ /dev/null @@ -1,98 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - float_or_none, - int_or_none, - str_or_none, - traverse_obj, - unified_timestamp, -) - - -class AnchorFMEpisodeIE(InfoExtractor): - _VALID_URL = r'https?://anchor\.fm/(?P\w+)/(?:embed/)?episodes/[\w-]+-(?P\w+)' - _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] - _TESTS = [{ - 'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d', - 'info_dict': { - 'id': 'e1tpt3d', - 'ext': 'mp3', - 'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!', - 'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c', - 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg', - 'duration': 624.718, - 'uploader': 'Lovelyti ', - 'uploader_id': '991541', - 'channel': 'lovelyti', - 'modified_date': '20230121', - 'modified_timestamp': 1674285178, - 'release_date': '20230121', - 'release_timestamp': 1674285179, - 'episode_id': 'e1tpt3d', - }, - }, { - # embed url - 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', - 'info_dict': { - 'id': 'e1shjqd', - 'ext': 'mp3', - 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong', - 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41', - 'duration': 1042.008, - 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', - 'release_date': '20221221', - 'release_timestamp': 1671595916, - 'modified_date': '20221221', - 'modified_timestamp': 1671590834, - 'channel': 'apakatatempo', - 'uploader': 'Podcast Tempo', - 'uploader_id': '2585461', - 'season': 'Season 2', - 'season_number': 2, - 'episode_id': 'e1shjqd', - }, - }] - - _WEBPAGE_TESTS = [{ - 'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong', - 'info_dict': { - 'id': 'e1shjqd', - 'ext': 'mp3', - 'release_date': '20221221', - 'duration': 1042.008, - 'season': 'Season 2', - 'modified_timestamp': 1671590834, - 'uploader_id': '2585461', - 'modified_date': '20221221', - 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41', - 'season_number': 2, - 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong', - 'release_timestamp': 1671595916, - 'episode_id': 'e1shjqd', - 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', - 'uploader': 'Podcast Tempo', - 'channel': 'apakatatempo', - }, - }] - - def _real_extract(self, url): - channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id') - api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id) - - return { - 'id': episode_id, - 'title': traverse_obj(api_data, ('episode', 'title')), - 'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')), - 'ext': 'mp3', - 'vcodec': 'none', - 'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')), - 'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)), - 'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000), - 'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))), - 'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))), - 'episode_id': episode_id, - 'uploader': traverse_obj(api_data, ('creator', 'name')), - 'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))), - 'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))), - 'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')), - } diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py deleted file mode 100644 index 0a600f6df9..0000000000 --- a/yt_dlp/extractor/appletrailers.py +++ /dev/null @@ -1,277 +0,0 @@ -import json -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - unified_strdate, -) - - -class AppleTrailersIE(InfoExtractor): - IE_NAME = 'appletrailers' - _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P[^/]+)/(?P[^/]+)' - _TESTS = [{ - 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', - 'info_dict': { - 'id': '5111', - 'title': 'Man of Steel', - }, - 'playlist': [ - { - 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', - 'info_dict': { - 'id': 'manofsteel-trailer4', - 'ext': 'mov', - 'duration': 111, - 'title': 'Trailer 4', - 'upload_date': '20130523', - 'uploader_id': 'wb', - }, - }, - { - 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', - 'info_dict': { - 'id': 'manofsteel-trailer3', - 'ext': 'mov', - 'duration': 182, - 'title': 'Trailer 3', - 'upload_date': '20130417', - 'uploader_id': 'wb', - }, - }, - { - 'md5': 'd0f1e1150989b9924679b441f3404d48', - 'info_dict': { - 'id': 'manofsteel-trailer', - 'ext': 'mov', - 'duration': 148, - 'title': 'Trailer', - 'upload_date': '20121212', - 'uploader_id': 'wb', - }, - }, - { - 'md5': '5fe08795b943eb2e757fa95cb6def1cb', - 'info_dict': { - 'id': 'manofsteel-teaser', - 'ext': 'mov', - 'duration': 93, - 'title': 'Teaser', - 'upload_date': '20120721', - 'uploader_id': 'wb', - }, - }, - ], - }, { - 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', - 'info_dict': { - 'id': '4489', - 'title': 'Blackthorn', - }, - 'playlist_mincount': 2, - 'expected_warnings': ['Unable to download JSON metadata'], - }, { - # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json - 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', - 'info_dict': { - 'id': '15881', - 'title': 'Kung Fu Panda 3', - }, - 'playlist_mincount': 4, - }, { - 'url': 'http://trailers.apple.com/ca/metropole/autrui/', - 'only_matching': True, - }, { - 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', - 'only_matching': True, - }] - - _JSON_RE = r'iTunes.playURL\((.*?)\);' - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - movie = mobj.group('movie') - uploader_id = mobj.group('company') - - webpage = self._download_webpage(url, movie) - film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') - film_data = self._download_json( - f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', - film_id, fatal=False) - - if film_data: - entries = [] - for clip in film_data.get('clips', []): - clip_title = clip['title'] - - formats = [] - for version, version_data in clip.get('versions', {}).items(): - for size, size_data in version_data.get('sizes', {}).items(): - src = size_data.get('src') - if not src: - continue - formats.append({ - 'format_id': f'{version}-{size}', - 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), - 'width': int_or_none(size_data.get('width')), - 'height': int_or_none(size_data.get('height')), - 'language': version[:2], - }) - - entries.append({ - 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), - 'formats': formats, - 'title': clip_title, - 'thumbnail': clip.get('screen') or clip.get('thumb'), - 'duration': parse_duration(clip.get('runtime') or clip.get('faded')), - 'upload_date': unified_strdate(clip.get('posted')), - 'uploader_id': uploader_id, - }) - - page_data = film_data.get('page', {}) - return self.playlist_result(entries, film_id, page_data.get('movie_title')) - - playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') - - def fix_html(s): - s = re.sub(r'(?s).*?', '', s) - s = re.sub(r'', r'', s) - # The ' in the onClick attributes are not escaped, it couldn't be parsed - # like: http://trailers.apple.com/trailers/wb/gravity/ - - def _clean_json(m): - return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) - s = re.sub(self._JSON_RE, _clean_json, s) - return f'{s}' - doc = self._download_xml(playlist_url, movie, transform_source=fix_html) - - playlist = [] - for li in doc.findall('./div/ul/li'): - on_click = li.find('.//a').attrib['onClick'] - trailer_info_json = self._search_regex(self._JSON_RE, - on_click, 'trailer info') - trailer_info = json.loads(trailer_info_json) - first_url = trailer_info.get('url') - if not first_url: - continue - title = trailer_info['title'] - video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() - thumbnail = li.find('.//img').attrib['src'] - upload_date = trailer_info['posted'].replace('-', '') - - runtime = trailer_info['runtime'] - m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime) - duration = None - if m: - duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) - - trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') - settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') - - formats = [] - for fmt in settings['metadata']['sizes']: - # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) - formats.append({ - 'url': format_url, - 'format': fmt['type'], - 'width': int_or_none(fmt['width']), - 'height': int_or_none(fmt['height']), - }) - - playlist.append({ - '_type': 'video', - 'id': video_id, - 'formats': formats, - 'title': title, - 'duration': duration, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'uploader_id': uploader_id, - 'http_headers': { - 'User-Agent': 'QuickTime compatible (yt-dlp)', - }, - }) - - return { - '_type': 'playlist', - 'id': movie, - 'entries': playlist, - } - - -class AppleTrailersSectionIE(InfoExtractor): - IE_NAME = 'appletrailers:section' - _SECTIONS = { - 'justadded': { - 'feed_path': 'just_added', - 'title': 'Just Added', - }, - 'exclusive': { - 'feed_path': 'exclusive', - 'title': 'Exclusive', - }, - 'justhd': { - 'feed_path': 'just_hd', - 'title': 'Just HD', - }, - 'mostpopular': { - 'feed_path': 'most_pop', - 'title': 'Most Popular', - }, - 'moviestudios': { - 'feed_path': 'studios', - 'title': 'Movie Studios', - }, - } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P{})'.format('|'.join(_SECTIONS)) - _TESTS = [{ - 'url': 'http://trailers.apple.com/#section=justadded', - 'info_dict': { - 'title': 'Just Added', - 'id': 'justadded', - }, - 'playlist_mincount': 80, - }, { - 'url': 'http://trailers.apple.com/#section=exclusive', - 'info_dict': { - 'title': 'Exclusive', - 'id': 'exclusive', - }, - 'playlist_mincount': 80, - }, { - 'url': 'http://trailers.apple.com/#section=justhd', - 'info_dict': { - 'title': 'Just HD', - 'id': 'justhd', - }, - 'playlist_mincount': 80, - }, { - 'url': 'http://trailers.apple.com/#section=mostpopular', - 'info_dict': { - 'title': 'Most Popular', - 'id': 'mostpopular', - }, - 'playlist_mincount': 30, - }, { - 'url': 'http://trailers.apple.com/#section=moviestudios', - 'info_dict': { - 'title': 'Movie Studios', - 'id': 'moviestudios', - }, - 'playlist_mincount': 80, - }] - - def _real_extract(self, url): - section = self._match_id(url) - section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), - section) - entries = [ - self.url_result('http://trailers.apple.com' + e['location']) - for e in section_data] - return self.playlist_result(entries, section, self._SECTIONS[section]['title']) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py deleted file mode 100644 index b05eccf182..0000000000 --- a/yt_dlp/extractor/atvat.py +++ /dev/null @@ -1,107 +0,0 @@ -import datetime as dt - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - jwt_encode, - try_get, -) - - -class ATVAtIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P.*)' - - _TESTS = [{ - 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen', - 'md5': '3c3b4aaca9f63e32b35e04a9c2515903', - 'info_dict': { - 'id': 'v-ce9cgn1e70n5-1', - 'ext': 'mp4', - 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - }, - }, { - 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', - 'only_matching': True, - }] - - # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger) - _ACCESS_ID = 'x_atv' - _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia' - - def _extract_video_info(self, url, content, video): - clip_id = content.get('splitId', content['id']) - formats = [] - clip_urls = video['urls'] - for protocol, variant in clip_urls.items(): - source_url = try_get(variant, lambda x: x['clear']['url']) - if not source_url: - continue - if protocol == 'dash': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id=protocol, fatal=False)) - elif protocol == 'hls': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id=protocol, fatal=False)) - else: - formats.append({ - 'url': source_url, - 'format_id': protocol, - }) - - return { - 'id': clip_id, - 'title': content.get('title'), - 'duration': float_or_none(content.get('duration')), - 'series': content.get('tvShowTitle'), - 'formats': formats, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_data = self._parse_json( - self._search_regex(r'', webpage, 'json_data'), - video_id=video_id) - - video_title = json_data['views']['default']['page']['title'] - content_resource = json_data['views']['default']['page']['contentResource'] - content_id = content_resource[0]['id'] - content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id_, content in enumerate(content_resource)] - - time_of_request = dt.datetime.now() - not_before = time_of_request - dt.timedelta(minutes=5) - expire = time_of_request + dt.timedelta(minutes=5) - payload = { - 'content_ids': { - content_id: content_ids, - }, - 'secure_delivery': True, - 'iat': int(time_of_request.timestamp()), - 'nbf': int(not_before.timestamp()), - 'exp': int(expire.timestamp()), - } - videos = self._download_json( - 'https://vas-v4.p7s1video.net/4.0/getsources', - content_id, 'Downloading videos JSON', query={ - 'token': jwt_encode(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID}), - }) - - video_id, videos_data = next(iter(videos['data'].items())) - error_msg = try_get(videos_data, lambda x: x['error']['title']) - if error_msg == 'Geo check failed': - self.raise_geo_restricted(error_msg) - elif error_msg: - raise ExtractorError(error_msg) - entries = [ - self._extract_video_info(url, content_resource[video['id']], video) - for video in videos_data] - - return { - '_type': 'multi_video', - 'id': video_id, - 'title': video_title, - 'entries': entries, - } diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py deleted file mode 100644 index 4066a5a83f..0000000000 --- a/yt_dlp/extractor/awaan.py +++ /dev/null @@ -1,181 +0,0 @@ -import base64 -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - format_field, - int_or_none, - parse_iso8601, - smuggle_url, - unsmuggle_url, - urlencode_postdata, -) - - -class AWAANIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' - - def _real_extract(self, url): - show_id, video_id, season_id = self._match_valid_url(url).groups() - if video_id and int(video_id) > 0: - return self.url_result( - f'http://awaan.ae/media/{video_id}', 'AWAANVideo') - elif season_id and int(season_id) > 0: - return self.url_result(smuggle_url( - f'http://awaan.ae/program/season/{season_id}', - {'show_id': show_id}), 'AWAANSeason') - else: - return self.url_result( - f'http://awaan.ae/program/{show_id}', 'AWAANSeason') - - -class AWAANBaseIE(InfoExtractor): - def _parse_video_data(self, video_data, video_id, is_live): - title = video_data.get('title_en') or video_data['title_ar'] - img = video_data.get('img') - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description_en') or video_data.get('description_ar'), - 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'), - 'duration': int_or_none(video_data.get('duration')), - 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), - 'is_live': is_live, - 'uploader_id': video_data.get('user_id'), - } - - -class AWAANVideoIE(AWAANBaseIE): - IE_NAME = 'awaan:video' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', - 'md5': '5f61c33bfc7794315c671a62d43116aa', - 'info_dict': - { - 'id': '17375', - 'ext': 'mp4', - 'title': 'رحلة العمر : الحلقة 1', - 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', - 'duration': 2041, - 'timestamp': 1227504126, - 'upload_date': '20081124', - 'uploader_id': '71', - }, - }, { - 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_data = self._download_json( - f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', - video_id, headers={'Origin': 'http://awaan.ae'}) - info = self._parse_video_data(video_data, video_id, False) - - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }) - info.update({ - '_type': 'url_transparent', - 'url': embed_url, - 'ie_key': 'MangomoloVideo', - }) - return info - - -class AWAANLiveIE(AWAANBaseIE): - IE_NAME = 'awaan:live' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P\d+)' - _TEST = { - 'url': 'http://awaan.ae/live/6/dubai-tv', - 'info_dict': { - 'id': '6', - 'ext': 'mp4', - 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'upload_date': '20150107', - 'timestamp': 1420588800, - 'uploader_id': '71', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - channel_id = self._match_id(url) - - channel_data = self._download_json( - f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', - channel_id, headers={'Origin': 'http://awaan.ae'}) - info = self._parse_video_data(channel_data, channel_id, True) - - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }) - info.update({ - '_type': 'url_transparent', - 'url': embed_url, - 'ie_key': 'MangomoloLive', - }) - return info - - -class AWAANSeasonIE(InfoExtractor): - IE_NAME = 'awaan:season' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P\d+)|season/(?P\d+))' - _TEST = { - 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', - 'info_dict': - { - 'id': '7910', - 'title': 'محاضرات الشيخ الشعراوي', - }, - 'playlist_mincount': 27, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - show_id, season_id = self._match_valid_url(url).groups() - - data = {} - if season_id: - data['season'] = season_id - show_id = smuggled_data.get('show_id') - if show_id is None: - season = self._download_json( - f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', - season_id, headers={'Origin': 'http://awaan.ae'}) - show_id = season['id'] - data['show_id'] = show_id - show = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/show', - show_id, data=urlencode_postdata(data), headers={ - 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded', - }) - if not season_id: - season_id = show['default_season'] - for season in show['seasons']: - if season['id'] == season_id: - title = season.get('title_en') or season['title_ar'] - - entries = [] - for video in show['videos']: - video_id = str(video['id']) - entries.append(self.url_result( - f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) - - return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/axs.py b/yt_dlp/extractor/axs.py deleted file mode 100644 index 7e91667712..0000000000 --- a/yt_dlp/extractor/axs.py +++ /dev/null @@ -1,89 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - float_or_none, - js_to_json, - parse_iso8601, - traverse_obj, - url_or_none, -) - - -class AxsIE(InfoExtractor): - IE_NAME = 'axs.tv' - _VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P[^/?#]+)' - - _TESTS = [{ - 'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/', - 'md5': '8d97736ae8e50c64df528e5e676778cf', - 'info_dict': { - 'id': '5f4dc776b70e4f1c194f22ef', - 'title': 'Small Town', - 'ext': 'mp4', - 'description': 'md5:e314d28bfaa227a4d7ec965fae19997f', - 'upload_date': '20230602', - 'timestamp': 1685729564, - 'duration': 1284.216, - 'series': 'Rock & Roll Road Trip with Sammy Hagar', - 'season': 'Season 2', - 'season_number': 2, - 'episode': '3', - 'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394', - }, - }, { - 'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall', - 'md5': '300ae795cd8f9984652c0949734ffbdc', - 'info_dict': { - 'id': '5f488148b70e4f392572977c', - 'display_id': 'daryl-hall', - 'title': 'Daryl Hall', - 'ext': 'mp4', - 'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628', - 'upload_date': '20230214', - 'timestamp': 1676403615, - 'duration': 2570.668, - 'series': 'The Big Interview with Dan Rather', - 'season': 'Season 3', - 'season_number': 3, - 'episode': '5', - 'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32', - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - webpage_json_data = self._search_json( - r'mountObj\s*=', webpage, 'video ID data', display_id, - transform_source=js_to_json) - video_id = webpage_json_data['video_id'] - company_id = webpage_json_data['company_id'] - - meta = self._download_json( - f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}', - video_id, query={'device_type': 'desktop_web'})['video'] - - formats = self._extract_m3u8_formats( - meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls') - - subtitles = {} - for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))): - subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append( - {'ext': cc.get('srtExt'), 'url': cc['srtPath']}) - - return { - 'id': video_id, - 'display_id': display_id, - 'formats': formats, - **traverse_obj(meta, { - 'title': ('title', {str}), - 'description': ('description', {str}), - 'series': ('seriestitle', {str}), - 'season_number': ('season', {int}), - 'episode': ('episode', {str}), - 'duration': ('duration', {float_or_none}), - 'timestamp': ('updated_at', {parse_iso8601}), - 'thumbnail': ('thumb', {url_or_none}), - }), - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/beatbump.py b/yt_dlp/extractor/beatbump.py deleted file mode 100644 index 777a1b3268..0000000000 --- a/yt_dlp/extractor/beatbump.py +++ /dev/null @@ -1,111 +0,0 @@ -from .common import InfoExtractor -from .youtube import YoutubeIE, YoutubeTabIE - - -class BeatBumpVideoIE(InfoExtractor): - _VALID_URL = r'https?://beatbump\.(?:ml|io)/listen\?id=(?P[\w-]+)' - _TESTS = [{ - 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs', - 'md5': '5ff3fff41d3935b9810a9731e485fe66', - 'info_dict': { - 'id': 'MgNrAu2pzNs', - 'ext': 'mp4', - 'artist': 'Stephen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', - 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', - 'upload_date': '20190312', - 'categories': ['Music'], - 'playable_in_embed': True, - 'duration': 169, - 'like_count': int, - 'alt_title': 'Voyeur Girl', - 'view_count': int, - 'track': 'Voyeur Girl', - 'uploader': 'Stephen', - 'title': 'Voyeur Girl', - 'channel_follower_count': int, - 'age_limit': 0, - 'availability': 'public', - 'live_status': 'not_live', - 'album': 'it\'s too much love to know my dear', - 'channel': 'Stephen', - 'comment_count': int, - 'description': 'md5:7ae382a65843d6df2685993e90a8628f', - 'tags': 'count:11', - 'creator': 'Stephen', - 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', - 'channel_is_verified': True, - 'heatmap': 'count:100', - }, - }, { - 'url': 'https://beatbump.io/listen?id=LDGZAprNGWo', - 'only_matching': True, - }] - - def _real_extract(self, url): - id_ = self._match_id(url) - return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_) - - -class BeatBumpPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P[\w-]+)' - _TESTS = [{ - 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE', - 'playlist_count': 50, - 'info_dict': { - 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', - 'availability': 'unlisted', - 'view_count': int, - 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', - 'description': '', - 'tags': [], - 'modified_date': '20231110', - }, - 'expected_warnings': ['YouTube Music is not directly supported'], - }, { - 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg', - 'playlist_mincount': 1, - 'params': {'flatplaylist': True}, - 'info_dict': { - 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', - 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', - 'uploader_id': '@NoCopyrightSounds', - 'channel_follower_count': int, - 'title': 'NoCopyrightSounds', - 'uploader': 'NoCopyrightSounds', - 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a', - 'channel': 'NoCopyrightSounds', - 'tags': 'count:65', - 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - 'channel_is_verified': True, - }, - 'expected_warnings': ['YouTube Music is not directly supported'], - }, { - 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'playlist_mincount': 1, - 'params': {'flatplaylist': True}, - 'info_dict': { - 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', - 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', - 'view_count': int, - 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', - 'uploader_id': '@NoCopyrightSounds', - 'title': 'NCS : All Releases 💿', - 'uploader': 'NoCopyrightSounds', - 'availability': 'public', - 'channel': 'NoCopyrightSounds', - 'tags': [], - 'modified_date': '20231112', - 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - }, - 'expected_warnings': ['YouTube Music is not directly supported'], - }, { - 'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c', - 'only_matching': True, - }] - - def _real_extract(self, url): - id_ = self._match_id(url) - return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_) diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py deleted file mode 100644 index 9c55bb9682..0000000000 --- a/yt_dlp/extractor/bigflix.py +++ /dev/null @@ -1,71 +0,0 @@ -import base64 -import re -import urllib.parse - -from .common import InfoExtractor - - -class BigflixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P[0-9]+)' - _TESTS = [{ - # 2 formats - 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', - 'info_dict': { - 'id': '16070', - 'ext': 'mp4', - 'title': 'Madarasapatinam', - 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b', - 'formats': 'mincount:2', - }, - 'params': { - 'skip_download': True, - }, - }, { - # multiple formats - 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r']+class=["\']pagetitle["\'][^>]*>(.+?)', - webpage, 'title') - - def decode_url(quoted_b64_url): - return base64.b64decode(urllib.parse.unquote( - quoted_b64_url)).decode('utf-8') - - formats = [] - for height, encoded_url in re.findall( - r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): - video_url = decode_url(encoded_url) - f = { - 'url': video_url, - 'format_id': f'{height}p', - 'height': int(height), - } - if video_url.startswith('rtmp'): - f['ext'] = 'flv' - formats.append(f) - - file_url = self._search_regex( - r'file=([^&]+)', webpage, 'video url', default=None) - if file_url: - video_url = decode_url(file_url) - if all(f['url'] != video_url for f in formats): - formats.append({ - 'url': decode_url(file_url), - }) - - description = self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'formats': formats, - } diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py deleted file mode 100644 index 42047aced1..0000000000 --- a/yt_dlp/extractor/bokecc.py +++ /dev/null @@ -1,52 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class BokeCCBaseIE(InfoExtractor): - def _extract_bokecc_formats(self, webpage, video_id, format_id=None): - player_params_str = self._html_search_regex( - r'<(?:script|embed)[^>]+src=(?P["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P.+?)(?P=q)', - webpage, 'player params', group='query') - - player_params = urllib.parse.parse_qs(player_params_str) - - info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( - player_params['siteid'][0], player_params['vid'][0]), video_id) - - return [{ - 'format_id': format_id, - 'url': quality.find('./copy').attrib['playurl'], - 'quality': int(quality.attrib['value']), - } for quality in info_xml.findall('./video/quality')] - - -class BokeCCIE(BokeCCBaseIE): - IE_DESC = 'CC视频' - _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P.*)' - - _TESTS = [{ - 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', - 'info_dict': { - 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', - 'ext': 'flv', - 'title': 'BokeCC Video', - }, - }] - - def _real_extract(self, url): - qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) - if not qs.get('vid') or not qs.get('uid'): - raise ExtractorError('Invalid URL', expected=True) - - video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) - - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'title': 'BokeCC Video', # no title provided in the webpage - 'formats': self._extract_bokecc_formats(webpage, video_id), - } diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py deleted file mode 100644 index ea5134d2f3..0000000000 --- a/yt_dlp/extractor/caffeinetv.py +++ /dev/null @@ -1,74 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - parse_iso8601, - traverse_obj, - urljoin, -) - - -class CaffeineTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P[\da-f-]+)' - _TESTS = [{ - 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', - 'info_dict': { - 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', - 'ext': 'mp4', - 'title': 'GOOOOD MORNINNNNN #highlights', - 'timestamp': 1654702180, - 'upload_date': '20220608', - 'uploader': 'RahJON Wicc', - 'uploader_id': 'TsuSurf', - 'duration': 3145, - 'age_limit': 17, - 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'tags': ['highlights', 'battlerap'], - }, - 'params': { - 'skip_download': 'm3u8', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json( - f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id) - broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {} - - video_url = broadcast_info['video_url'] - ext = determine_ext(video_url) - if ext == 'm3u8': - formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') - else: - formats = [{'url': video_url}] - - return { - 'id': video_id, - 'formats': formats, - **traverse_obj(json_data, { - 'like_count': ('like_count', {int_or_none}), - 'view_count': ('view_count', {int_or_none}), - 'comment_count': ('comment_count', {int_or_none}), - 'tags': ('tags', ..., {str}, filter), - 'uploader': ('user', 'name', {str}), - 'uploader_id': (((None, 'user'), 'username'), {str}, any), - 'is_live': ('is_live', {bool}), - }), - **traverse_obj(broadcast_info, { - 'title': ('broadcast_title', {str}), - 'duration': ('content_duration', {int_or_none}), - 'timestamp': ('broadcast_start_time', {parse_iso8601}), - 'thumbnail': ('preview_image_path', {urljoin(url)}), - }), - 'age_limit': { - # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system - 'FOUR_PLUS': 0, - 'NINE_PLUS': 9, - 'TWELVE_PLUS': 12, - 'SEVENTEEN_PLUS': 17, - }.get(broadcast_info.get('content_rating'), 17), - } diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py deleted file mode 100644 index ee2e56f8e0..0000000000 --- a/yt_dlp/extractor/callin.py +++ /dev/null @@ -1,155 +0,0 @@ -from .common import InfoExtractor -from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj - - -class CallinIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P[-a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc', - 'info_dict': { - 'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd', - 'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions', - 'ext': 'ts', - 'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc', - 'thumbnail': 're:https://.+\\.png', - 'description': 'First episode', - 'uploader': 'Wesley Yang', - 'timestamp': 1639404128.65, - 'upload_date': '20211213', - 'uploader_id': 'wesyang', - 'uploader_url': 'http://wesleyyang.substack.com', - 'channel': 'Conversations in Year Zero', - 'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553', - 'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx', - 'duration': 9951.936, - 'view_count': int, - 'categories': ['News & Politics', 'History', 'Technology'], - 'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'], - 'series': 'Conversations in Year Zero', - 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553', - 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions', - 'episode_number': 1, - 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd', - }, - }, { - 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', - 'md5': '14ede27ee2c957b7e4db93140fc0745c', - 'info_dict': { - 'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', - 'ext': 'ts', - 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink', - 'description': 'Or, why the government doesn’t like SpaceX', - 'channel': 'The Pull Request', - 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa', - 'duration': 3182.472, - 'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638', - 'uploader_url': 'http://thepullrequest.com', - 'upload_date': '20220902', - 'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink', - 'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', - 'series': 'The Pull Request', - 'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638', - 'view_count': int, - 'uploader': 'Antonio García Martínez', - 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png', - 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', - 'timestamp': 1662100688.005, - }, - }, { - 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', - 'md5': '16f704ddbf82a27e3930533b12062f07', - 'info_dict': { - 'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', - 'ext': 'ts', - 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?', - 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.', - 'channel': 'The DEBRIEF With Briahna Joy Gray', - 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm', - 'duration': 10043.16, - 'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7', - 'uploader_url': 'http://patreon.com/badfaithpodcast', - 'upload_date': '20220826', - 'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?', - 'display_id': 'episode-', - 'series': 'The DEBRIEF With Briahna Joy Gray', - 'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7', - 'view_count': int, - 'uploader': 'Briahna Gray', - 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png', - 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', - 'timestamp': 1661476708.282, - }, - }] - - def try_get_user_name(self, d): - names = [d.get(n) for n in ('first', 'last')] - if None in names: - return next((n for n in names if n), default=None) - return ' '.join(names) - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - next_data = self._search_nextjs_data(webpage, display_id) - episode = next_data['props']['pageProps']['episode'] - - video_id = episode['id'] - title = episode.get('title') or self._generic_title('', webpage) - url = episode['m3u8'] - formats = self._extract_m3u8_formats(url, display_id, ext='ts') - - show = traverse_obj(episode, ('show', 'title')) - show_id = traverse_obj(episode, ('show', 'id')) - - show_json = None - app_slug = (self._html_search_regex( - '\d+)' - _TESTS = [{ - # single file - 'url': 'http://www.camdemy.com/media/5181/', - 'md5': '5a5562b6a98b37873119102e052e311b', - 'info_dict': { - 'id': '5181', - 'ext': 'mp4', - 'title': 'Ch1-1 Introduction, Signals (02-23-2012)', - 'thumbnail': r're:^https?://.*\.jpg$', - 'creator': 'ss11spring', - 'duration': 1591, - 'upload_date': '20130114', - 'view_count': int, - }, - }, { - # With non-empty description - # webpage returns "No permission or not login" - 'url': 'http://www.camdemy.com/media/13885', - 'md5': '4576a3bb2581f86c61044822adbd1249', - 'info_dict': { - 'id': '13885', - 'ext': 'mp4', - 'title': 'EverCam + Camdemy QuickStart', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', - 'creator': 'evercam', - 'duration': 318, - }, - }, { - # External source (YouTube) - 'url': 'http://www.camdemy.com/media/14842', - 'info_dict': { - 'id': '2vsYQzNIsJo', - 'ext': 'mp4', - 'title': 'Excel 2013 Tutorial - How to add Password Protection', - 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', - 'upload_date': '20130211', - 'uploader': 'Hun Kim', - 'uploader_id': 'hunkimtutorials', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - src_from = self._html_search_regex( - r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*]+(?:href|title)=(['\"])(?P(?:(?!\1).)+)\1", - webpage, 'external source', default=None, group='url') - if src_from: - return self.url_result(src_from) - - oembed_obj = self._download_json( - 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) - - title = oembed_obj['title'] - thumb_url = oembed_obj['thumbnail_url'] - video_folder = urllib.parse.urljoin(thumb_url, 'video/') - file_list_doc = self._download_xml( - urllib.parse.urljoin(video_folder, 'fileList.xml'), - video_id, 'Downloading filelist XML') - file_name = file_list_doc.find('./video/item/fileName').text - video_url = urllib.parse.urljoin(video_folder, file_name) - - # Some URLs return "No permission or not login" in a webpage despite being - # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) - upload_date = unified_strdate(self._search_regex( - r'>published on ([^<]+)<', webpage, - 'upload date', default=None)) - view_count = str_to_int(self._search_regex( - r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views', - webpage, 'view count', default=None)) - description = self._html_search_meta( - 'description', webpage, default=None) or clean_html( - oembed_obj.get('description')) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumb_url, - 'description': description, - 'creator': oembed_obj.get('author_name'), - 'duration': parse_duration(oembed_obj.get('duration')), - 'upload_date': upload_date, - 'view_count': view_count, - } - - -class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P\d+)' - _TESTS = [{ - # links with trailing slash - 'url': 'http://www.camdemy.com/folder/450', - 'info_dict': { - 'id': '450', - 'title': '信號與系統 2012 & 2011 (Signals and Systems)', - }, - 'playlist_mincount': 145, - }, { - # links without trailing slash - # and multi-page - 'url': 'http://www.camdemy.com/folder/853', - 'info_dict': { - 'id': '853', - 'title': '科學計算 - 使用 Matlab', - }, - 'playlist_mincount': 20, - }, { - # with displayMode parameter. For testing the codes to add parameters - 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', - 'info_dict': { - 'id': '853', - 'title': '科學計算 - 使用 Matlab', - }, - 'playlist_mincount': 20, - }] - - def _real_extract(self, url): - folder_id = self._match_id(url) - - # Add displayMode=list so that all links are displayed in a single page - parsed_url = list(urllib.parse.urlparse(url)) - query = dict(urllib.parse.parse_qsl(parsed_url[4])) - query.update({'displayMode': 'list'}) - parsed_url[4] = urllib.parse.urlencode(query) - final_url = urllib.parse.urlunparse(parsed_url) - - page = self._download_webpage(final_url, folder_id) - matches = re.findall(r"href='(/media/\d+/?)'", page) - - entries = [self.url_result('http://www.camdemy.com' + media_path) - for media_path in matches] - - folder_title = self._html_search_meta('keywords', page) - - return self.playlist_result(entries, folder_id, folder_title) diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py deleted file mode 100644 index 393f217308..0000000000 --- a/yt_dlp/extractor/clippit.py +++ /dev/null @@ -1,70 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - qualities, -) - - -class ClippitIE(InfoExtractor): - - _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P[a-z]+)' - _TEST = { - 'url': 'https://www.clippituser.tv/c/evmgm', - 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', - 'info_dict': { - 'id': 'evmgm', - 'ext': 'mp4', - 'title': 'Bye bye Brutus. #BattleBots - Clippit', - 'uploader': 'lizllove', - 'uploader_url': 'https://www.clippituser.tv/p/lizllove', - 'timestamp': 1472183818, - 'upload_date': '20160826', - 'description': 'BattleBots | ABC', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'(.+?)', webpage, 'title') - - FORMATS = ('sd', 'hd') - quality = qualities(FORMATS) - formats = [] - for format_id in FORMATS: - url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"', - webpage, 'url', fatal=False) - if not url: - continue - match = re.search(r'/(?P\d+)\.mp4', url) - formats.append({ - 'url': url, - 'format_id': format_id, - 'quality': quality(format_id), - 'height': int(match.group('height')) if match else None, - }) - - uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', - webpage, 'uploader', fatal=False) - uploader_url = ('https://www.clippituser.tv/p/' + uploader - if uploader else None) - - timestamp = self._html_search_regex(r'datetime="(.+?)"', - webpage, 'date', fatal=False) - thumbnail = self._html_search_regex(r'data-image="(.+?)"', - webpage, 'thumbnail', fatal=False) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'uploader': uploader, - 'uploader_url': uploader_url, - 'timestamp': parse_iso8601(timestamp), - 'description': self._og_search_description(webpage), - 'thumbnail': thumbnail, - } diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py deleted file mode 100644 index 63d760a39b..0000000000 --- a/yt_dlp/extractor/contv.py +++ /dev/null @@ -1,113 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, -) - - -class CONtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P[^/]+)' - _TESTS = [{ - 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter', - 'info_dict': { - 'id': 'CEG10022949', - 'ext': 'mp4', - 'title': 'Days Of Thrills & Laughter', - 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb', - 'upload_date': '20180703', - 'timestamp': 1530634789.61, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites', - 'info_dict': { - 'id': 'CLIP-show_fotld_bts', - 'title': 'Fight of the Living Dead: Behind the Scenes Bites', - }, - 'playlist_mincount': 7, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - details = self._download_json( - 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id, - video_id, query={'device': 'web'}) - - if details.get('type') == 'episodic': - seasons = self._download_json( - 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id, - video_id) - entries = [] - for season in seasons: - for episode in season.get('episodes', []): - episode_id = episode.get('id') - if not episode_id: - continue - entries.append(self.url_result( - 'https://www.contv.com/details-movie/' + episode_id, - CONtvIE.ie_key(), episode_id)) - return self.playlist_result(entries, video_id, details.get('title')) - - m_details = details['details'] - title = details['title'] - - formats = [] - - media_hls_url = m_details.get('media_hls_url') - if media_hls_url: - formats.extend(self._extract_m3u8_formats( - media_hls_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - - media_mp4_url = m_details.get('media_mp4_url') - if media_mp4_url: - formats.append({ - 'format_id': 'http', - 'url': media_mp4_url, - }) - - subtitles = {} - captions = m_details.get('captions') or {} - for caption_url in captions.values(): - subtitles.setdefault('en', []).append({ - 'url': caption_url, - }) - - thumbnails = [] - for image in m_details.get('images', []): - image_url = image.get('url') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - description = None - for p in ('large_', 'medium_', 'small_', ''): - d = m_details.get(p + 'description') - if d: - description = d - break - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': description, - 'timestamp': float_or_none(details.get('metax_added_on'), 1000), - 'subtitles': subtitles, - 'duration': float_or_none(m_details.get('duration'), 1000), - 'view_count': int_or_none(details.get('num_watched')), - 'like_count': int_or_none(details.get('num_fav')), - 'categories': details.get('category'), - 'tags': details.get('tags'), - 'season_number': int_or_none(details.get('season')), - 'episode_number': int_or_none(details.get('episode')), - 'release_year': int_or_none(details.get('pub_year')), - } diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py deleted file mode 100644 index 16e9a61941..0000000000 --- a/yt_dlp/extractor/drooble.py +++ /dev/null @@ -1,113 +0,0 @@ -import json - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - try_get, -) - - -class DroobleIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://drooble\.com/(?: - (?:(?P[^/]+)/)?(?Psong|videos|music/albums)/(?P\d+)| - (?P[^/]+)/(?Pvideos|music)) - ''' - _TESTS = [{ - 'url': 'https://drooble.com/song/2858030', - 'md5': '5ffda90f61c7c318dc0c3df4179eb064', - 'info_dict': { - 'id': '2858030', - 'ext': 'mp3', - 'title': 'Skankocillin', - 'upload_date': '20200801', - 'timestamp': 1596241390, - 'uploader_id': '95894', - 'uploader': 'Bluebeat Shelter', - }, - }, { - 'url': 'https://drooble.com/karl340758/videos/2859183', - 'info_dict': { - 'id': 'J6QCQY_I5Tk', - 'ext': 'mp4', - 'title': 'Skankocillin', - 'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg', - 'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca', - 'upload_date': '20200731', - 'uploader': 'Bluebeat Shelter', - }, - }, { - 'url': 'https://drooble.com/karl340758/music/albums/2858031', - 'info_dict': { - 'id': '2858031', - }, - 'playlist_mincount': 8, - }, { - 'url': 'https://drooble.com/karl340758/music', - 'info_dict': { - 'id': 'karl340758', - }, - 'playlist_mincount': 8, - }, { - 'url': 'https://drooble.com/karl340758/videos', - 'info_dict': { - 'id': 'karl340758', - }, - 'playlist_mincount': 8, - }] - - def _call_api(self, method, video_id, data=None): - response = self._download_json( - f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode()) - if not response[0]: - raise ExtractorError('Unable to download JSON metadata') - return response[1] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - user = mobj.group('user') or mobj.group('user_2') - kind = mobj.group('kind') or mobj.group('kind_2') - display_id = mobj.group('id') or user - - if mobj.group('kind_2') == 'videos': - data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'} - elif kind in ('music/albums', 'music'): - data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}} - else: - data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'} - - method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements' - json_data = self._call_api(method, display_id, data=data) - if kind in ('music/albums', 'music'): - json_data = json_data['singles']['list'] - - entites = [] - for media in json_data: - url = media.get('external_media_url') or media.get('link') - if url.startswith('https://www.youtube.com'): - entites.append({ - '_type': 'url', - 'url': url, - 'ie_key': 'Youtube', - }) - continue - is_audio = (media.get('type') or '').lower() == 'audio' - entites.append({ - 'url': url, - 'id': media['id'], - 'title': media['title'], - 'duration': int_or_none(media.get('duration')), - 'timestamp': int_or_none(media.get('timestamp')), - 'album': try_get(media, lambda x: x['album']['title']), - 'uploader': try_get(media, lambda x: x['creator']['display_name']), - 'uploader_id': try_get(media, lambda x: x['creator']['id']), - 'thumbnail': media.get('image_comment'), - 'like_count': int_or_none(media.get('likes')), - 'vcodec': 'none' if is_audio else None, - 'ext': 'mp3' if is_audio else None, - }) - - if len(entites) > 1: - return self.playlist_result(entites, display_id) - - return entites[0] diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py deleted file mode 100644 index 68c3f05731..0000000000 --- a/yt_dlp/extractor/duboku.py +++ /dev/null @@ -1,246 +0,0 @@ -import base64 -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - extract_attributes, - get_elements_by_class, - int_or_none, - js_to_json, - smuggle_url, - unescapeHTML, -) - - -def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True): - """Return the content of the tag with the specified attribute in the passed HTML document""" - - if tag is None: - tag = '[a-zA-Z0-9:._-]+' - if attribute is None: - attribute = '' - else: - attribute = rf'\s+(?P{re.escape(attribute)})' - if value is None: - value = '' - else: - value = re.escape(value) if escape_value else value - value = f'=[\'"]?(?P{value})[\'"]?' - - retlist = [] - for m in re.finditer(rf'''(?xs) - <(?P{tag}) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? - {attribute}{value} - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? - \s*> - (?P.*?) - - ''', html): - retlist.append(m) - - return retlist - - -def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True): - retval = _get_elements_by_tag_and_attrib(html, tag, attribute, value, escape_value) - return retval[0] if retval else None - - -class DubokuIE(InfoExtractor): - IE_NAME = 'duboku' - IE_DESC = 'www.duboku.io' - - _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P[0-9]+-[0-9-]+)\.html.*' - _TESTS = [{ - 'url': 'https://w.duboku.io/vodplay/1575-1-1.html', - 'info_dict': { - 'id': '1575-1-1', - 'ext': 'mp4', - 'series': '白色月光', - 'title': 'contains:白色月光', - 'season_number': 1, - 'episode_number': 1, - 'season': 'Season 1', - 'episode_id': '1', - 'season_id': '1', - 'episode': 'Episode 1', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - }, { - 'url': 'https://w.duboku.io/vodplay/1588-1-1.html', - 'info_dict': { - 'id': '1588-1-1', - 'ext': 'mp4', - 'series': '亲爱的自己', - 'title': 'contains:第1集', - 'season_number': 1, - 'episode_number': 1, - 'episode': 'Episode 1', - 'season': 'Season 1', - 'episode_id': '1', - 'season_id': '1', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - }] - - _PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*(.*)', html) - if mobj: - href = extract_attributes(mobj.group(0)).get('href') - if href: - mobj1 = re.search(r'/(\d+)\.html', href) - if mobj1 and mobj1.group(1) == series_id: - series_title = clean_html(mobj.group(0)) - series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title) - title = clean_html(html) - title = re.sub(r'[\s\r\n\t]+', ' ', title) - break - - data_url = player_data.get('url') - if not data_url: - raise ExtractorError('Cannot find url in player_data') - player_encrypt = player_data.get('encrypt') - if player_encrypt == 1: - data_url = urllib.parse.unquote(data_url) - elif player_encrypt == 2: - data_url = urllib.parse.unquote(base64.b64decode(data_url).decode('ascii')) - - # if it is an embedded iframe, maybe it's an external source - headers = {'Referer': webpage_url} - if player_data.get('from') == 'iframe': - # use _type url_transparent to retain the meaningful details - # of the video. - return { - '_type': 'url_transparent', - 'url': smuggle_url(data_url, {'referer': webpage_url}), - 'id': video_id, - 'title': title, - 'series': series_title, - 'season_number': int_or_none(season_id), - 'season_id': season_id, - 'episode_number': int_or_none(episode_id), - 'episode_id': episode_id, - } - - formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers) - - return { - 'id': video_id, - 'title': title, - 'series': series_title, - 'season_number': int_or_none(season_id), - 'season_id': season_id, - 'episode_number': int_or_none(episode_id), - 'episode_id': episode_id, - 'formats': formats, - 'http_headers': headers, - } - - -class DubokuPlaylistIE(InfoExtractor): - IE_NAME = 'duboku:list' - IE_DESC = 'www.duboku.io entire series' - - _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P[0-9]+)\.html.*' - _TESTS = [{ - 'url': 'https://w.duboku.io/voddetail/1575.html', - 'info_dict': { - 'id': 'startswith:1575', - 'title': '白色月光', - }, - 'playlist_count': 12, - }, { - 'url': 'https://w.duboku.io/voddetail/1554.html', - 'info_dict': { - 'id': 'startswith:1554', - 'title': '以家人之名', - }, - 'playlist_mincount': 30, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - if mobj is None: - raise ExtractorError(f'Invalid URL: {url}') - series_id = mobj.group('id') - fragment = urllib.parse.urlparse(url).fragment - - webpage_url = f'https://w.duboku.io/voddetail/{series_id}.html' - webpage_html = self._download_webpage(webpage_url, series_id) - - # extract title - - title = _get_element_by_tag_and_attrib(webpage_html, 'h1', 'class', 'title') - title = unescapeHTML(title.group('content')) if title else None - if not title: - title = self._html_search_meta('keywords', webpage_html) - if not title: - title = _get_element_by_tag_and_attrib(webpage_html, 'title') - title = unescapeHTML(title.group('content')) if title else None - - # extract playlists - - playlists = {} - for div in _get_elements_by_tag_and_attrib( - webpage_html, attribute='id', value='playlist\\d+', escape_value=False): - playlist_id = div.group('value') - playlist = [] - for a in _get_elements_by_tag_and_attrib( - div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False): - playlist.append({ - 'href': unescapeHTML(a.group('value')), - 'title': unescapeHTML(a.group('content')), - }) - playlists[playlist_id] = playlist - - # select the specified playlist if url fragment exists - playlist = None - playlist_id = None - if fragment: - playlist = playlists.get(fragment) - playlist_id = fragment - else: - first = next(iter(playlists.items()), None) - if first: - (playlist_id, playlist) = first - if not playlist: - raise ExtractorError( - f'Cannot find {fragment}' if fragment else 'Cannot extract playlist') - - # return url results - return self.playlist_result([ - self.url_result( - urllib.parse.urljoin('https://w.duboku.io', x['href']), - ie=DubokuIE.ie_key(), video_title=x.get('title')) - for x in playlist], series_id + '#' + playlist_id, title) diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py deleted file mode 100644 index 3ac4c56ae0..0000000000 --- a/yt_dlp/extractor/eighttracks.py +++ /dev/null @@ -1,158 +0,0 @@ -import json -import random - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) - - -class EightTracksIE(InfoExtractor): - IE_NAME = '8tracks' - _VALID_URL = r'https?://8tracks\.com/(?P[^/]+)/(?P[^/#]+)(?:#.*)?$' - _TEST = { - 'name': 'EightTracks', - 'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a', - 'info_dict': { - 'id': '1336550', - 'display_id': 'youtube-dl-test-tracks-a', - 'description': "test chars: \"'/\\ä↭", - 'title': "youtube-dl test tracks \"'/\\ä↭<>", - }, - 'playlist': [ - { - 'md5': '96ce57f24389fc8734ce47f4c1abcc55', - 'info_dict': { - 'id': '11885610', - 'ext': 'm4a', - 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': '4ab26f05c1f7291ea460a3920be8021f', - 'info_dict': { - 'id': '11885608', - 'ext': 'm4a', - 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': 'd30b5b5f74217410f4689605c35d1fd7', - 'info_dict': { - 'id': '11885679', - 'ext': 'm4a', - 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': '4eb0a669317cd725f6bbd336a29f923a', - 'info_dict': { - 'id': '11885680', - 'ext': 'm4a', - 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': '1893e872e263a2705558d1d319ad19e8', - 'info_dict': { - 'id': '11885682', - 'ext': 'm4a', - 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': 'b673c46f47a216ab1741ae8836af5899', - 'info_dict': { - 'id': '11885683', - 'ext': 'm4a', - 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': '1d74534e95df54986da7f5abf7d842b7', - 'info_dict': { - 'id': '11885684', - 'ext': 'm4a', - 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - { - 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0', - 'info_dict': { - 'id': '11885685', - 'ext': 'm4a', - 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl', - }, - }, - ], - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - data = self._parse_json( - self._search_regex( - r'(?s)PAGE\.mix\s*=\s*({.+?});\n', webpage, 'trax information'), - playlist_id) - - session = str(random.randint(0, 1000000000)) - mix_id = data['id'] - track_count = data['tracks_count'] - duration = data['duration'] - avg_song_duration = float(duration) / track_count - # duration is sometimes negative, use predefined avg duration - if avg_song_duration <= 0: - avg_song_duration = 300 - first_url = f'http://8tracks.com/sets/{session}/play?player=sm&mix_id={mix_id}&format=jsonh' - next_url = first_url - entries = [] - - for i in range(track_count): - api_json = None - download_tries = 0 - - while api_json is None: - try: - api_json = self._download_webpage( - next_url, playlist_id, - note='Downloading song information %d/%d' % (i + 1, track_count), - errnote='Failed to download song information') - except ExtractorError: - if download_tries > 3: - raise - else: - download_tries += 1 - self._sleep(avg_song_duration, playlist_id) - - api_data = json.loads(api_json) - track_data = api_data['set']['track'] - info = { - 'id': str(track_data['id']), - 'url': track_data['track_file_stream_url'], - 'title': track_data['performer'] + ' - ' + track_data['name'], - 'raw_title': track_data['name'], - 'uploader_id': data['user']['login'], - 'ext': 'm4a', - } - entries.append(info) - - next_url = 'http://8tracks.com/sets/{}/next?player=sm&mix_id={}&format=jsonh&track_id={}'.format( - session, mix_id, track_data['id']) - return { - '_type': 'playlist', - 'entries': entries, - 'id': str(mix_id), - 'display_id': playlist_id, - 'title': data.get('name'), - 'description': data.get('description'), - } diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py deleted file mode 100644 index 18b802eb15..0000000000 --- a/yt_dlp/extractor/eitb.py +++ /dev/null @@ -1,81 +0,0 @@ -from .common import InfoExtractor -from ..networking import Request -from ..utils import ( - float_or_none, - int_or_none, - join_nonempty, - parse_iso8601, -) - - -class EitbIE(InfoExtractor): - IE_NAME = 'eitb.tv' - _VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P\d+)' - - _TEST = { - 'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/', - 'md5': 'edf4436247185adee3ea18ce64c47998', - 'info_dict': { - 'id': '4090227752001', - 'ext': 'mp4', - 'title': '60 minutos (Lasa y Zabala, 30 años)', - 'description': 'Programa de reportajes de actualidad.', - 'duration': 3996.76, - 'timestamp': 1381789200, - 'upload_date': '20131014', - 'tags': list, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - f'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/{video_id}/', - video_id, 'Downloading video JSON') - - media = video['web_media'][0] - - formats = [] - for rendition in media['RENDITIONS']: - video_url = rendition.get('PMD_URL') - if not video_url: - continue - tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) - formats.append({ - 'url': rendition['PMD_URL'], - 'format_id': join_nonempty('http', int_or_none(tbr)), - 'width': int_or_none(rendition.get('FRAME_WIDTH')), - 'height': int_or_none(rendition.get('FRAME_HEIGHT')), - 'tbr': tbr, - }) - - hls_url = media.get('HLS_SURL') - if hls_url: - request = Request( - 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', - headers={'Referer': url}) - token_data = self._download_json( - request, video_id, 'Downloading auth token', fatal=False) - if token_data: - token = token_data.get('token') - if token: - formats.extend(self._extract_m3u8_formats( - f'{hls_url}?hdnts={token}', video_id, m3u8_id='hls', fatal=False)) - - hds_url = media.get('HDS_SURL') - if hds_url: - formats.extend(self._extract_f4m_formats( - '{}?hdcore=3.7.0'.format(hds_url.replace('euskalsvod', 'euskalvod')), - video_id, f4m_id='hds', fatal=False)) - - return { - 'id': video_id, - 'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'], - 'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'), - 'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'), - 'duration': float_or_none(media.get('LENGTH'), 1000), - 'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '), - 'tags': media.get('TAGS'), - 'formats': formats, - } diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py deleted file mode 100644 index b8c67ce4cc..0000000000 --- a/yt_dlp/extractor/eyedotv.py +++ /dev/null @@ -1,61 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - xpath_text, -) - - -class EyedoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P[0-9]+)' - _TEST = { - 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', - 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', - 'info_dict': { - 'id': '16301', - 'ext': 'mp4', - 'title': 'Journée du conseil scientifique de l\'Afnic 2015', - 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', - 'uploader': 'Afnic Live', - 'uploader_id': '8023', - }, - } - _ROOT_URL = 'http://live.eyedo.net:1935/' - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_xml(f'http://eyedo.tv/api/live/GetLive/{video_id}', video_id) - - def _add_ns(path): - return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') - - title = xpath_text(video_data, _add_ns('Titre'), 'title', True) - state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) - if state_live_code == 'avenir': - raise ExtractorError( - f'{self.IE_NAME} said: We\'re sorry, but this video is not yet available.', - expected=True) - - is_live = state_live_code == 'live' - m3u8_url = None - # http://eyedo.tv/Content/Html5/Scripts/html5view.js - if is_live: - if xpath_text(video_data, 'Cdn') == 'true': - m3u8_url = f'http://rrr.sz.xlcdn.com/?account=eyedo&file=A{video_id}&type=live&service=wowza&protocol=http&output=playlist.m3u8' - else: - m3u8_url = self._ROOT_URL + f'w/{video_id}/eyedo_720p/playlist.m3u8' - else: - m3u8_url = self._ROOT_URL + f'replay-w/{video_id}/mp4:{video_id}.mp4/playlist.m3u8' - - return { - 'id': video_id, - 'title': title, - 'formats': self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native'), - 'description': xpath_text(video_data, _add_ns('Description')), - 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), - 'uploader': xpath_text(video_data, _add_ns('Createur')), - 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), - 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), - 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), - } diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py index 797e894d67..c814af24c8 100644 --- a/yt_dlp/extractor/footyroom.py +++ b/yt_dlp/extractor/footyroom.py @@ -12,14 +12,6 @@ class FootyRoomIE(InfoExtractor): }, 'playlist_count': 2, 'add_ie': [StreamableIE.ie_key()], - }, { - 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', - 'info_dict': { - 'id': '75817984', - 'title': 'VIDEO Georgia 0 - 2 Germany', - }, - 'playlist_count': 1, - 'add_ie': ['Playwire'], }] def _real_extract(self, url): @@ -38,13 +30,6 @@ class FootyRoomIE(InfoExtractor): payload = video.get('payload') if not payload: continue - playwire_url = self._html_search_regex( - r'data-config="([^"]+)"', payload, - 'playwire url', default=None) - if playwire_url: - entries.append(self.url_result(self._proto_relative_url( - playwire_url, 'http:'), 'Playwire')) - streamable_url = StreamableIE._extract_url(payload) if streamable_url: entries.append(self.url_result( diff --git a/yt_dlp/extractor/franceinter.py b/yt_dlp/extractor/franceinter.py deleted file mode 100644 index 779249b843..0000000000 --- a/yt_dlp/extractor/franceinter.py +++ /dev/null @@ -1,56 +0,0 @@ -from .common import InfoExtractor -from ..utils import month_by_name - - -class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P[^?#]+)' - - _TEST = { - 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'md5': '9e54d7bdb6fdc02a841007f8a975c094', - 'info_dict': { - 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'ext': 'mp3', - 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', - 'description': 'md5:401969c5d318c061f86bda1fa359292b', - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20160907', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_url = self._search_regex( - r'(?s)]+class=["\']page-diffusion["\'][^>]*>.*?]+data-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'video url', group='url') - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) - - upload_date_str = self._search_regex( - r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', - webpage, 'upload date', fatal=False) - if upload_date_str: - upload_date_list = upload_date_str.split() - upload_date_list.reverse() - upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) - upload_date_list[2] = '%02d' % int(upload_date_list[2]) - upload_date = ''.join(upload_date_list) - else: - upload_date = None - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': [{ - 'url': video_url, - 'vcodec': 'none', - }], - } diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py deleted file mode 100644 index 3319b12681..0000000000 --- a/yt_dlp/extractor/fujitv.py +++ /dev/null @@ -1,73 +0,0 @@ -from .common import InfoExtractor -from ..networking import HEADRequest - - -class FujiTVFODPlus7IE(InfoExtractor): - _VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P[0-9a-z]{4})/(?P[0-9a-z]+)' - _BASE_URL = 'https://i.fod.fujitv.co.jp/' - _BITRATE_MAP = { - 300: (320, 180), - 800: (640, 360), - 1200: (1280, 720), - 2000: (1280, 720), - 4000: (1920, 1080), - } - - _TESTS = [{ - 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076', - 'info_dict': { - 'id': '5d40110076', - 'ext': 'ts', - 'title': '#1318 『まる子、まぼろしの洋館を見る』の巻', - 'series': 'ちびまる子ちゃん', - 'series_id': '5d40', - 'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750', - 'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg', - }, - }, { - 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083', - 'info_dict': { - 'id': '5d40810083', - 'ext': 'ts', - 'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻', - 'description': 'md5:3972d900b896adc8ab1849e310507efa', - 'series': 'ちびまる子ちゃん', - 'series_id': '5d40', - 'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'}, - 'skip': 'Video available only in one week', - }] - - def _real_extract(self, url): - series_id, video_id = self._match_valid_url(url).groups() - self._request_webpage(HEADRequest(url), video_id) - json_info = {} - token = self._get_cookies(url).get('CT') - if token: - json_info = self._download_json( - f'https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id={video_id}&is_premium=false', - video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False) - else: - self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}') - formats, subtitles = [], {} - src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id) - for src in src_json['video_selector']: - if not src.get('url'): - continue - fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts') - for f in fmt: - f.update(dict(zip(('height', 'width'), - self._BITRATE_MAP.get(f.get('tbr'), ()), strict=False))) - formats.extend(fmt) - subtitles = self._merge_subtitles(subtitles, subs) - - return { - 'id': video_id, - 'title': json_info.get('ep_title'), - 'series': json_info.get('lu_title'), - 'series_id': series_id, - 'description': json_info.get('ep_description'), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg', - '_format_sort_fields': ('tbr', ), - } diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index 024628e1ca..2684073301 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -1,70 +1,13 @@ -import re - from .common import InfoExtractor from ..utils import ( clean_html, int_or_none, parse_codecs, parse_duration, - str_to_int, unified_timestamp, ) -class GabTVIE(InfoExtractor): - _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P[a-z0-9-]+)' - _TESTS = [{ - 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488', - 'info_dict': { - 'id': '61217eacea5665de450d0488', - 'ext': 'mp4', - 'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY', - 'uploader': 'Wurzelroot', - 'uploader_id': '608fb0a85738fd1974984f7d', - 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url).split('-')[-1] - webpage = self._download_webpage(url, video_id) - channel_id = self._search_regex(r'data-channel-id=\"(?P[^\"]+)', webpage, 'channel_id') - channel_name = self._search_regex(r'data-channel-name=\"(?P[^\"]+)', webpage, 'channel_name') - title = self._search_regex(r'data-episode-title=\"(?P[^\"]+)', webpage, 'title') - view_key = self._search_regex(r'data-view-key=\"(?P[^\"]+)', webpage, 'view_key') - description = clean_html( - self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None - available_resolutions = re.findall( - rf'[^\"]+)', webpage) - - formats = [] - for resolution in available_resolutions: - frmt = { - 'url': f'https://tv.gab.com/media/{video_id}?viewKey={view_key}&r={resolution}', - 'format_id': resolution, - 'vcodec': 'h264', - 'acodec': 'aac', - 'ext': 'mp4', - } - if 'audio-' in resolution: - frmt['abr'] = str_to_int(resolution.replace('audio-', '')) - frmt['height'] = 144 - frmt['quality'] = -10 - else: - frmt['height'] = str_to_int(resolution.replace('p', '')) - formats.append(frmt) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'uploader': channel_name, - 'uploader_id': channel_id, - 'thumbnail': f'https://tv.gab.com/image/{video_id}', - } - - class GabIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gab\.com/[^/]+/posts/(?P\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py deleted file mode 100644 index 8d1cc4fa11..0000000000 --- a/yt_dlp/extractor/googlepodcasts.py +++ /dev/null @@ -1,84 +0,0 @@ -import json - -from .common import InfoExtractor -from ..utils import ( - clean_podcast_url, - int_or_none, - try_get, - urlencode_postdata, -) - - -class GooglePodcastsBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/' - - def _batch_execute(self, func_id, video_id, params): - return json.loads(self._download_json( - 'https://podcasts.google.com/_/PodcastsUi/data/batchexecute', - video_id, data=urlencode_postdata({ - 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]), - }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2]) - - def _extract_episode(self, episode): - return { - 'id': episode[4][3], - 'title': episode[8], - 'url': clean_podcast_url(episode[13]), - 'thumbnail': episode[2], - 'description': episode[9], - 'creator': try_get(episode, lambda x: x[14]), - 'timestamp': int_or_none(episode[11]), - 'duration': int_or_none(episode[12]), - 'series': episode[1], - } - - -class GooglePodcastsIE(GooglePodcastsBaseIE): - IE_NAME = 'google:podcasts' - _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P[^/]+)/episode/(?P[^/?&#]+)' - _TEST = { - 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh', - 'md5': 'fa56b2ee8bd0703e27e42d4b104c4766', - 'info_dict': { - 'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a', - 'ext': 'mp3', - 'title': 'WWDTM New Year 2021', - 'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.', - 'upload_date': '20210102', - 'timestamp': 1609606800, - 'duration': 2901, - 'series': "Wait Wait... Don't Tell Me!", - }, - } - - def _real_extract(self, url): - b64_feed_url, b64_guid = self._match_valid_url(url).groups() - episode = self._batch_execute( - 'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1] - return self._extract_episode(episode) - - -class GooglePodcastsFeedIE(GooglePodcastsBaseIE): - IE_NAME = 'google:podcasts:feed' - _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P[^/?&#]+)/?(?:[?#&]|$)' - _TEST = { - 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA', - 'info_dict': { - 'title': "Wait Wait... Don't Tell Me!", - 'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.", - }, - 'playlist_mincount': 20, - } - - def _real_extract(self, url): - b64_feed_url = self._match_id(url) - data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url]) - - entries = [] - for episode in (try_get(data, lambda x: x[1][0]) or []): - entries.append(self._extract_episode(episode)) - - feed = try_get(data, lambda x: x[3]) or [] - return self.playlist_result( - entries, playlist_title=try_get(feed, lambda x: x[0]), - playlist_description=try_get(feed, lambda x: x[2])) diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py deleted file mode 100644 index 7bcac9bdea..0000000000 --- a/yt_dlp/extractor/goshgay.py +++ /dev/null @@ -1,47 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - parse_duration, -) - - -class GoshgayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P\d+?)($|/)' - _TEST = { - 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', - 'md5': '4b6db9a0a333142eb9f15913142b0ed1', - 'info_dict': { - 'id': '299069', - 'ext': 'flv', - 'title': 'DIESEL SFW XXX Video', - 'thumbnail': r're:^http://.*\.jpg$', - 'duration': 80, - 'age_limit': 18, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'

(.*?)<', webpage, 'title') - duration = parse_duration(self._html_search_regex( - r'\s*-?\s*(.*?)', - webpage, 'duration', fatal=False)) - - flashvars = urllib.parse.parse_qs(self._html_search_regex( - r'\d+)\.html' - _TEST = { - 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html', - 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798', - 'info_dict': { - 'id': '5156', - 'ext': 'mp4', - 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis', - 'duration': 1219, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - root_path = self._search_regex( - r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', - default='http://evt.dispeak.com/nvidia/events/gtc15/') - xml_file_id = self._search_regex( - r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id') - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'{root_path}xml/{xml_file_id}.xml', - 'ie_key': 'DigitallySpeaking', - } diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py deleted file mode 100644 index 3998abc121..0000000000 --- a/yt_dlp/extractor/hketv.py +++ /dev/null @@ -1,183 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - int_or_none, - merge_dicts, - parse_count, - str_or_none, - try_get, - unified_strdate, - urlencode_postdata, - urljoin, -) - - -class HKETVIE(InfoExtractor): - IE_NAME = 'hketv' - IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau' - _GEO_BYPASS = False - _GEO_COUNTRIES = ['HK'] - _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.hkedcity.net/etv/resource/2932360618', - 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7', - 'info_dict': { - 'id': '2932360618', - 'ext': 'mp4', - 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', - 'description': 'md5:d5286d05219ef50e0613311cbe96e560', - 'upload_date': '20181024', - 'duration': 900, - 'subtitles': 'count:2', - }, - 'skip': 'Geo restricted to HK', - }, { - 'url': 'https://www.hkedcity.net/etv/resource/972641418', - 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', - 'info_dict': { - 'id': '972641418', - 'ext': 'mp4', - 'title': '衣冠楚楚 (天使系列之一)', - 'description': 'md5:10bb3d659421e74f58e5db5691627b0f', - 'upload_date': '20070109', - 'duration': 907, - 'subtitles': {}, - }, - 'skip': 'Geo restricted to HK', - }] - - _CC_LANGS = { - '中文(繁體中文)': 'zh-Hant', - '中文(简体中文)': 'zh-Hans', - 'English': 'en', - 'Bahasa Indonesia': 'id', - '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi', - '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne', - 'Tagalog': 'tl', - '\u0e44\u0e17\u0e22': 'th', - '\u0627\u0631\u062f\u0648': 'ur', - } - _FORMAT_HEIGHTS = { - 'SD': 360, - 'HD': 720, - } - _APPS_BASE_URL = 'https://apps.hkedcity.net' - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = ( - self._html_search_meta( - ('ed_title', 'search.ed_title'), webpage, default=None) - or self._search_regex( - r'data-favorite_title_(?:eng|chi)=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'title', default=None, group='url') - or self._html_search_regex( - r'

([^<]+)

', webpage, 'title', default=None) - or self._og_search_title(webpage) - ) - - file_id = self._search_regex( - r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);', - webpage, 'file ID') - curr_url = self._search_regex( - r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";', - webpage, 'curr URL') - data = { - 'action': 'get_info', - 'curr_url': curr_url, - 'file_id': file_id, - 'video_url': file_id, - } - - response = self._download_json( - self._APPS_BASE_URL + '/media/play/handler.php', video_id, - data=urlencode_postdata(data), - headers=merge_dicts({ - 'Content-Type': 'application/x-www-form-urlencoded'}, - self.geo_verification_headers())) - - result = response['result'] - - if not response.get('success') or not response.get('access'): - error = clean_html(response.get('access_err_msg')) - if 'Video streaming is not available in your country' in error: - self.raise_geo_restricted( - msg=error, countries=self._GEO_COUNTRIES) - else: - raise ExtractorError(error, expected=True) - - formats = [] - - width = int_or_none(result.get('width')) - height = int_or_none(result.get('height')) - - playlist0 = result['playlist'][0] - for fmt in playlist0['sources']: - file_url = urljoin(self._APPS_BASE_URL, fmt.get('file')) - if not file_url: - continue - # If we ever wanted to provide the final resolved URL that - # does not require cookies, albeit with a shorter lifespan: - # urlh = self._downloader.urlopen(file_url) - # resolved_url = urlh.url - label = fmt.get('label') - h = self._FORMAT_HEIGHTS.get(label) - w = h * width // height if h and width and height else None - formats.append({ - 'format_id': label, - 'ext': fmt.get('type'), - 'url': file_url, - 'width': w, - 'height': h, - }) - - subtitles = {} - tracks = try_get(playlist0, lambda x: x['tracks'], list) or [] - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = str_or_none(track.get('kind')) - if not track_kind or not isinstance(track_kind, str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(self._APPS_BASE_URL, track.get('file')) - if not track_url: - continue - track_label = track.get('label') - subtitles.setdefault(self._CC_LANGS.get( - track_label, track_label), []).append({ - 'url': self._proto_relative_url(track_url), - 'ext': 'srt', - }) - - # Likes - emotion = self._download_json( - 'https://emocounter.hkedcity.net/handler.php', video_id, - data=urlencode_postdata({ - 'action': 'get_emotion', - 'data[bucket_id]': 'etv', - 'data[identifier]': video_id, - }), - headers={'Content-Type': 'application/x-www-form-urlencoded'}, - fatal=False) or {} - like_count = int_or_none(try_get( - emotion, lambda x: x['data']['emotion_data'][0]['count'])) - - return { - 'id': video_id, - 'title': title, - 'description': self._html_search_meta( - 'description', webpage, fatal=False), - 'upload_date': unified_strdate(self._html_search_meta( - 'ed_date', webpage, fatal=False), day_first=False), - 'duration': int_or_none(result.get('length')), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')), - 'view_count': parse_count(result.get('view_count')), - 'like_count': like_count, - } diff --git a/yt_dlp/extractor/idolplus.py b/yt_dlp/extractor/idolplus.py deleted file mode 100644 index 3c905b0712..0000000000 --- a/yt_dlp/extractor/idolplus.py +++ /dev/null @@ -1,115 +0,0 @@ -from .common import InfoExtractor -from ..utils import traverse_obj, try_call, url_or_none - - -class IdolPlusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P\w+)' - _TESTS = [{ - 'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00', - 'md5': '2ace3f4661c943a2f7e79f0b88cea1e7', - 'info_dict': { - 'id': 'M012077298PPV00', - 'ext': 'mp4', - 'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)', - 'release_date': '20200707', - 'formats': 'count:65', - }, - 'params': {'format': '532-KIM_MINJU'}, - }, { - 'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5', - 'info_dict': { - 'id': 'M01232H058PPV00', - 'ext': 'mp4', - 'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)', - 'release_date': '20230218', - 'formats': 'count:5', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - # live stream - 'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00', - 'info_dict': { - 'id': 'M012323174PPV00', - 'ext': 'mp4', - 'title': 'Hanteo Music Awards 2022 DAY2', - 'release_date': '20230211', - 'formats': 'count:5', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://idolplus.com/zs/concert/M012323039PPV00', - 'info_dict': { - 'id': 'M012323039PPV00', - 'ext': 'mp4', - 'title': 'CIRCLE CHART MUSIC AWARDS 2022', - 'release_date': '20230218', - 'formats': 'count:5', - }, - 'params': {'skip_download': 'm3u8'}, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data_list = traverse_obj(self._download_json( - 'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id, - headers={'App_type': 'web', 'Country_Code': 'KR'}, query={ - 'rulesetId': 'contents', - 'albumId': video_id, - 'distribute': 'PRD', - 'loggedIn': 'false', - 'region': 'zs', - 'countryGroup': '00010', - 'lang': 'en', - 'saId': '999999999998', - }), ('data', 'viewData', ...)) - - player_data = {} - while data_list: - player_data = data_list.pop() - if traverse_obj(player_data, 'type') == 'player': - break - elif traverse_obj(player_data, ('dataList', ...)): - data_list += player_data['dataList'] - - formats = self._extract_m3u8_formats(traverse_obj(player_data, ( - 'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id) - - subtitles = {} - for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []: - subtitles.setdefault(caption.get('lang') or 'und', []).append({ - 'url': caption.get('smi_url'), - 'ext': 'vtt', - }) - - # Add member multicams as alternative formats - if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y' - and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'): - cuesheet = traverse_obj(self._download_json( - 'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id, - 'Downloading JSON metadata for member multicams', - headers={'App_type': 'web', 'Country_Code': 'KR'}, query={ - 'ALBUM_ID': video_id, - 'COUNTRY_GRP': '00010', - 'LANG': 'en', - 'SA_ID': '999999999998', - 'COUNTRY_CODE': 'KR', - }), ('data', 'cuesheet_item', 0)) - - for member in traverse_obj(cuesheet, ('members', ...)): - index = try_call(lambda: int(member['omni_view_index']) - 1) - member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none})) - if not member_video_url: - continue - member_formats = self._extract_m3u8_formats( - member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}') - for mf in member_formats: - mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}' - formats.extend(member_formats) - - return { - 'id': video_id, - 'title': traverse_obj(player_data, ('detail', 'albumName')), - 'formats': formats, - 'subtitles': subtitles, - 'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')), - } diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 5274c9339f..5335922293 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,7 +1,7 @@ import base64 import urllib.parse -from .bokecc import BokeCCBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, @@ -10,7 +10,7 @@ from ..utils import ( ) -class InfoQIE(BokeCCBaseIE): +class InfoQIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P[^/]+)' _TESTS = [{ @@ -117,14 +117,10 @@ class InfoQIE(BokeCCBaseIE): video_title = self._html_extract_title(webpage) video_description = self._html_search_meta('description', webpage, 'description') - if '/cn/' in url: - # for China videos, HTTP video URL exists but always fails with 403 - formats = self._extract_bokecc_formats(webpage, video_id) - else: - formats = ( - self._extract_rtmp_video(webpage) - + self._extract_http_video(webpage) - + self._extract_http_audio(webpage, video_id)) + formats = ( + self._extract_rtmp_video(webpage) + + self._extract_http_video(webpage) + + self._extract_http_audio(webpage, video_id)) return { 'id': video_id, diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py deleted file mode 100644 index 9d2574cb06..0000000000 --- a/yt_dlp/extractor/internetvideoarchive.py +++ /dev/null @@ -1,58 +0,0 @@ -import json -import re - -from .common import InfoExtractor -from ..utils import parse_qs - - -class InternetVideoArchiveIE(InfoExtractor): - _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?' - - _TEST = { - 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false', - 'info_dict': { - 'id': '194487', - 'ext': 'mp4', - 'title': 'Kick-Ass 2', - 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - @staticmethod - def _build_json_url(query): - return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query - - def _real_extract(self, url): - query = parse_qs(url) - video_id = query['publishedid'][0] - data = self._download_json( - 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx', - video_id, data=json.dumps({ - 'customerid': query['customerid'][0], - 'publishedid': video_id, - }).encode()) - title = data['Title'] - formats = self._extract_m3u8_formats( - data['VideoUrl'], video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - file_url = formats[0]['url'] - if '.ism/' in file_url: - replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url) - formats.extend(self._extract_f4m_formats( - replace_url('.f4m'), video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_mpd_formats( - replace_url('.mpd'), video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_ism_formats( - replace_url('Manifest'), video_id, ism_id='mss', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': data.get('PosterUrl'), - 'description': data.get('Description'), - } diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py deleted file mode 100644 index cf2a269c38..0000000000 --- a/yt_dlp/extractor/izlesene.py +++ /dev/null @@ -1,111 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - float_or_none, - get_element_by_id, - int_or_none, - parse_iso8601, - str_to_int, -) - - -class IzleseneIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:(?:www|m)\.)?izlesene\.com/ - (?:video|embedplayer)/(?:[^/]+/)?(?P[0-9]+) - ''' - _TESTS = [ - { - 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694', - 'md5': '4384f9f0ea65086734b881085ee05ac2', - 'info_dict': { - 'id': '7599694', - 'ext': 'mp4', - 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', - 'description': 'md5:253753e2655dde93f59f74b572454f6d', - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader_id': 'pelikzzle', - 'timestamp': int, - 'upload_date': '20140702', - 'duration': 95.395, - 'age_limit': 0, - }, - }, - { - 'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997', - 'md5': '97f09b6872bffa284cb7fa4f6910cb72', - 'info_dict': { - 'id': '17997', - 'ext': 'mp4', - 'title': 'Tarkan Dortmund 2006 Konseri', - 'thumbnail': r're:^https://.*\.jpg', - 'uploader_id': 'parlayankiz', - 'timestamp': int, - 'upload_date': '20061112', - 'duration': 253.666, - 'age_limit': 0, - }, - }, - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(f'http://www.izlesene.com/video/{video_id}', video_id) - - video = self._parse_json( - self._search_regex( - r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'), - video_id) - - title = video.get('videoTitle') or self._og_search_title(webpage) - - formats = [] - for stream in video['media']['level']: - source_url = stream.get('source') - if not source_url or not isinstance(source_url, str): - continue - ext = determine_ext(url, 'mp4') - quality = stream.get('value') - height = int_or_none(quality) - formats.append({ - 'format_id': f'{quality}p' if quality else 'sd', - 'url': urllib.parse.unquote(source_url), - 'ext': ext, - 'height': height, - }) - - description = self._og_search_description(webpage, default=None) - thumbnail = video.get('posterURL') or self._proto_relative_url( - self._og_search_thumbnail(webpage), scheme='http:') - - uploader = self._html_search_regex( - r"adduserUsername\s*=\s*'([^']+)';", - webpage, 'uploader', fatal=False) - timestamp = parse_iso8601(self._html_search_meta( - 'uploadDate', webpage, 'upload date')) - - duration = float_or_none(video.get('duration') or self._html_search_regex( - r'videoduration["\']?\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'duration', fatal=False, group='value'), scale=1000) - - view_count = str_to_int(get_element_by_id('videoViewCount', webpage)) - comment_count = self._html_search_regex( - r'comment_count\s*=\s*\'([^\']+)\';', - webpage, 'comment_count', fatal=False) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader_id': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'age_limit': self._family_friendly_search(webpage), - 'formats': formats, - } diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py deleted file mode 100644 index 67c089104c..0000000000 --- a/yt_dlp/extractor/kinja.py +++ /dev/null @@ -1,206 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - strip_or_none, - try_get, -) - - -class KinjaEmbedIE(InfoExtractor): - IE_NAME = 'kinja:embed' - _DOMAIN_REGEX = r'''(?:[^.]+\.)? - (?: - avclub| - clickhole| - deadspin| - gizmodo| - jalopnik| - jezebel| - kinja| - kotaku| - lifehacker| - splinternews| - the(?:inventory|onion|root|takeout) - )\.com''' - _COMMON_REGEX = r'''/ - (?: - ajax/inset| - embed/video - )/iframe\?.*?\bid=''' - _VALID_URL = rf'''(?x)https?://{_DOMAIN_REGEX}{_COMMON_REGEX} - (?P - fb| - imgur| - instagram| - jwp(?:layer)?-video| - kinjavideo| - mcp| - megaphone| - soundcloud(?:-playlist)?| - tumblr-post| - twitch-stream| - twitter| - ustream-channel| - vimeo| - vine| - youtube-(?:list|video) - )-(?P[^&]+)''' - _EMBED_REGEX = [rf'(?x)]+?src=(?P["\'])(?P(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1'] - _TESTS = [{ - 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', - 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', - 'only_matching': True, - }] - _WEBPAGE_TESTS = [{ - 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', - 'info_dict': { - 'id': '106351', - 'ext': 'mp4', - 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', - }, - 'skip': 'Invalid URL', - }] - _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') - _PROVIDER_MAP = { - 'fb': ('facebook.com/video.php?v=', 'Facebook'), - 'imgur': ('imgur.com/', 'Imgur'), - 'instagram': ('instagram.com/p/', 'Instagram'), - 'jwplayer-video': _JWPLATFORM_PROVIDER, - 'jwp-video': _JWPLATFORM_PROVIDER, - 'megaphone': ('player.megaphone.fm/', 'Generic'), - 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), - 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), - 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), - 'twitch-stream': ('twitch.tv/', 'TwitchStream'), - 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), - 'ustream-channel': ('ustream.tv/embed/', 'Ustream'), - 'vimeo': ('vimeo.com/', 'Vimeo'), - 'vine': ('vine.co/v/', 'Vine'), - 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), - 'youtube-video': ('youtube.com/embed/', 'Youtube'), - } - - def _real_extract(self, url): - video_type, video_id = self._match_valid_url(url).groups() - - provider = self._PROVIDER_MAP.get(video_type) - if provider: - video_id = urllib.parse.unquote(video_id) - if video_type == 'tumblr-post': - video_id, blog = video_id.split('-', 1) - result_url = provider[0] % (blog, video_id) - elif video_type == 'youtube-list': - video_id, playlist_id = video_id.split('/') - result_url = provider[0] % (video_id, playlist_id) - else: - result_url = provider[0] + video_id - return self.url_result('http://' + result_url, provider[1]) - - if video_type == 'kinjavideo': - data = self._download_json( - 'https://kinja.com/api/core/video/views/videoById', - video_id, query={'videoId': video_id})['data'] - title = data['title'] - - formats = [] - for k in ('signedPlaylist', 'streaming'): - m3u8_url = data.get(k + 'Url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - thumbnail = None - poster = data.get('poster') or {} - poster_id = poster.get('id') - if poster_id: - thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/{}.{}'.format(poster_id, poster.get('format') or 'jpg') - - return { - 'id': video_id, - 'title': title, - 'description': strip_or_none(data.get('description')), - 'formats': formats, - 'tags': data.get('tags'), - 'timestamp': int_or_none(try_get( - data, lambda x: x['postInfo']['publishTimeMillis']), 1000), - 'thumbnail': thumbnail, - 'uploader': data.get('network'), - } - else: - video_data = self._download_json( - 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, - video_id)['videoMetadata'] - iptc = video_data['photoVideoMetadataIPTC'] - title = iptc['title']['en'] - fmg = video_data.get('photoVideoMetadata_fmg') or {} - tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' - data = self._download_json( - tvss_domain + '/api/v3/video-auth/url-signature-tokens', - video_id, query={'mcpids': video_id})['data'][0] - formats = [] - - rendition_url = data.get('renditionUrl') - if rendition_url: - formats = self._extract_m3u8_formats( - rendition_url, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - - fallback_rendition_url = data.get('fallbackRenditionUrl') - if fallback_rendition_url: - formats.append({ - 'format_id': 'fallback', - 'tbr': int_or_none(self._search_regex( - r'_(\d+)\.mp4', fallback_rendition_url, - 'bitrate', default=None)), - 'url': fallback_rendition_url, - }) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], str), - 'uploader': fmg.get('network'), - 'duration': int_or_none(iptc.get('fileDuration')), - 'formats': formats, - 'description': try_get(iptc, lambda x: x['description']['en'], str), - 'timestamp': parse_iso8601(iptc.get('dateReleased')), - } diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py deleted file mode 100644 index 6ec5b59f9a..0000000000 --- a/yt_dlp/extractor/koo.py +++ /dev/null @@ -1,115 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - try_get, -) - - -class KooIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P[^/&#$?]+)' - _TESTS = [{ # Test for video in the comments - 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', - 'info_dict': { - 'id': '946c4189-bc2d-4524-b95b-43f641e2adde', - 'ext': 'mp4', - 'title': 'test for video in comment', - 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7', - 'timestamp': 1632215195, - 'uploader_id': 'ytdlpTestAccount', - 'uploader': 'yt-dlpTestAccount', - 'duration': 7000, - 'upload_date': '20210921', - }, - 'params': {'skip_download': True}, - }, { # Test for koo with long title - 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361', - 'info_dict': { - 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361', - 'ext': 'mp4', - 'title': 'md5:47a71c2337295330c5a19a8af1bbf450', - 'description': 'md5:06a6a84e9321499486dab541693d8425', - 'timestamp': 1632106884, - 'uploader_id': 'laxman_kumarDBFEC', - 'uploader': 'Laxman Kumar 🇮🇳', - 'duration': 46000, - 'upload_date': '20210920', - }, - 'params': {'skip_download': True}, - }, { # Test for audio - 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602', - 'info_dict': { - 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602', - 'ext': 'mp4', - 'title': 'Test for audio', - 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8', - 'timestamp': 1632211634, - 'uploader_id': 'ytdlpTestAccount', - 'uploader': 'yt-dlpTestAccount', - 'duration': 214000, - 'upload_date': '20210921', - }, - 'params': {'skip_download': True}, - }, { # Test for video - 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1', - 'info_dict': { - 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1', - 'ext': 'mp4', - 'title': 'Test for video', - 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500', - 'timestamp': 1632211468, - 'uploader_id': 'ytdlpTestAccount', - 'uploader': 'yt-dlpTestAccount', - 'duration': 14000, - 'upload_date': '20210921', - }, - 'params': {'skip_download': True}, - }, { # Test for link - 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a', - 'skip': 'No video/audio found at the provided url.', - 'info_dict': { - 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a', - 'title': 'Test for link', - 'ext': 'none', - }, - }, { # Test for images - 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb', - 'skip': 'No video/audio found at the provided url.', - 'info_dict': { - 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb', - 'title': 'Test for images', - 'ext': 'none', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data_json = self._download_json( - f'https://www.kooapp.com/apiV1/ku/{video_id}?limit=20&offset=0&showSimilarKoos=true', video_id)['parentContent'] - item_json = next(content['items'][0] for content in data_json - if try_get(content, lambda x: x['items'][0]['id']) == video_id) - media_json = item_json['mediaMap'] - formats = [] - - mp4_url = media_json.get('videoMp4') - video_m3u8_url = media_json.get('videoHls') - if mp4_url: - formats.append({ - 'url': mp4_url, - 'ext': 'mp4', - }) - if video_m3u8_url: - formats.extend(self._extract_m3u8_formats(video_m3u8_url, video_id, fatal=False, ext='mp4')) - if not formats: - self.raise_no_formats('No video/audio found at the provided url.', expected=True) - - return { - 'id': video_id, - 'title': clean_html(item_json.get('title')), - 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}', - 'timestamp': item_json.get('createdAt'), - 'uploader_id': item_json.get('handle'), - 'uploader': item_json.get('name'), - 'duration': media_json.get('duration'), - 'formats': formats, - } diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index 58baa3fea7..efec5d7382 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -1,9 +1,6 @@ -import base64 import datetime as dt -import hashlib import re import time -import urllib.parse from .common import InfoExtractor from ..compat import compat_ord @@ -14,8 +11,6 @@ from ..utils import ( int_or_none, orderedSet, parse_iso8601, - str_or_none, - url_basename, urshift, ) @@ -248,114 +243,3 @@ class LePlaylistIE(InfoExtractor): return self.playlist_result(entries, playlist_id, playlist_title=title, playlist_description=description) - - -class LetvCloudIE(InfoExtractor): - # Most of *.letv.com is changed to *.le.com on 2016/01/02 - # but yuntv.letv.com is kept, so also keep the extractor name - IE_DESC = '乐视云' - _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+' - - _TESTS = [{ - 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf', - 'md5': '26450599afd64c513bc77030ad15db44', - 'info_dict': { - 'id': 'p7jnfw5hw9_467623dedf', - 'ext': 'mp4', - 'title': 'Video p7jnfw5hw9_467623dedf', - }, - }, { - 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', - 'md5': 'e03d9cc8d9c13191e1caf277e42dbd31', - 'info_dict': { - 'id': 'p7jnfw5hw9_ec93197892', - 'ext': 'mp4', - 'title': 'Video p7jnfw5hw9_ec93197892', - }, - }, { - 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', - 'md5': 'cb988699a776b22d4a41b9d43acfb3ac', - 'info_dict': { - 'id': 'p7jnfw5hw9_187060b6fd', - 'ext': 'mp4', - 'title': 'Video p7jnfw5hw9_187060b6fd', - }, - }] - - @staticmethod - def sign_data(obj): - if obj['cf'] == 'flash': - salt = '2f9d6924b33a165a6d8b5d3d42f4f987' - items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu'] - elif obj['cf'] == 'html5': - salt = 'fbeh5player12c43eccf2bec3300344' - items = ['cf', 'ran', 'uu', 'bver', 'vu'] - input_data = ''.join([item + obj[item] for item in items]) + salt - obj['sign'] = hashlib.md5(input_data.encode()).hexdigest() - - def _get_formats(self, cf, uu, vu, media_id): - def get_play_json(cf, timestamp): - data = { - 'cf': cf, - 'ver': '2.2', - 'bver': 'firefox44.0', - 'format': 'json', - 'uu': uu, - 'vu': vu, - 'ran': str(timestamp), - } - self.sign_data(data) - return self._download_json( - 'http://api.letvcloud.com/gpc.php?' + urllib.parse.urlencode(data), - media_id, f'Downloading playJson data for type {cf}') - - play_json = get_play_json(cf, time.time()) - # The server time may be different from local time - if play_json.get('code') == 10071: - play_json = get_play_json(cf, play_json['timestamp']) - - if not play_json.get('data'): - if play_json.get('message'): - raise ExtractorError('Letv cloud said: {}'.format(play_json['message']), expected=True) - elif play_json.get('code'): - raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True) - else: - raise ExtractorError('Letv cloud returned an unknown error') - - def b64decode(s): - return base64.b64decode(s).decode('utf-8') - - formats = [] - for media in play_json['data']['video_info']['media'].values(): - play_url = media['play_url'] - url = b64decode(play_url['main_url']) - decoded_url = b64decode(url_basename(url)) - formats.append({ - 'url': url, - 'ext': determine_ext(decoded_url), - 'format_id': str_or_none(play_url.get('vtype')), - 'format_note': str_or_none(play_url.get('definition')), - 'width': int_or_none(play_url.get('vwidth')), - 'height': int_or_none(play_url.get('vheight')), - }) - - return formats - - def _real_extract(self, url): - uu_mobj = re.search(r'uu=([\w]+)', url) - vu_mobj = re.search(r'vu=([\w]+)', url) - - if not uu_mobj or not vu_mobj: - raise ExtractorError(f'Invalid URL: {url}', expected=True) - - uu = uu_mobj.group(1) - vu = vu_mobj.group(1) - media_id = uu + '_' + vu - - formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id) - - return { - 'id': media_id, - 'title': f'Video {media_id}', - 'formats': formats, - } diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py deleted file mode 100644 index 7f7947ee7b..0000000000 --- a/yt_dlp/extractor/livestream.py +++ /dev/null @@ -1,386 +0,0 @@ -import itertools -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - find_xpath_attr, - float_or_none, - int_or_none, - orderedSet, - parse_iso8601, - traverse_obj, - update_url_query, - xpath_attr, - xpath_text, - xpath_with_ns, -) - - -class LivestreamIE(InfoExtractor): - IE_NAME = 'livestream' - _VALID_URL = r'''(?x) - https?://(?:new\.)?livestream\.com/ - (?:accounts/(?P\d+)|(?P[^/]+)) - (?:/events/(?P\d+)|/(?P[^/]+))? - (?:/videos/(?P\d+))? - ''' - _EMBED_REGEX = [r']+src="(?Phttps?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"'] - - _TESTS = [{ - 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', - 'md5': '7876c5f5dc3e711b6b73acce4aac1527', - 'info_dict': { - 'id': '4719370', - 'ext': 'mp4', - 'title': 'Live from Webster Hall NYC', - 'timestamp': 1350008072, - 'upload_date': '20121012', - 'duration': 5968.0, - 'like_count': int, - 'view_count': int, - 'comment_count': int, - 'thumbnail': r're:^http://.*\.jpg$', - }, - }, { - 'url': 'https://livestream.com/coheedandcambria/websterhall', - 'info_dict': { - 'id': '1585861', - 'title': 'Live From Webster Hall', - }, - 'playlist_mincount': 1, - }, { - 'url': 'https://livestream.com/dayananda/events/7954027', - 'info_dict': { - 'title': 'Live from Mevo', - 'id': '7954027', - }, - 'playlist_mincount': 4, - }, { - 'url': 'https://livestream.com/accounts/82', - 'info_dict': { - 'id': '253978', - 'view_count': int, - 'title': 'trsr', - 'comment_count': int, - 'like_count': int, - 'upload_date': '20120306', - 'timestamp': 1331042383, - 'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg', - 'duration': 15.332, - 'ext': 'mp4', - }, - }, { - 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', - 'only_matching': True, - }, { - 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015', - 'only_matching': True, - }] - _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s' - - def _parse_smil_formats_and_subtitles( - self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - base_ele = find_xpath_attr( - smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase') - base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/' - - formats = [] - video_nodes = smil.findall(self._xpath_ns('.//video', namespace)) - - for vn in video_nodes: - tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000) - furl = ( - update_url_query(urllib.parse.urljoin(base, vn.attrib['src']), { - 'v': '3.0.3', - 'fp': 'WIN% 14,0,0,145', - })) - if 'clipBegin' in vn.attrib: - furl += '&ssek=' + vn.attrib['clipBegin'] - formats.append({ - 'url': furl, - 'format_id': 'smil_%d' % tbr, - 'ext': 'flv', - 'tbr': tbr, - 'preference': -1000, # Strictly inferior than all other formats? - }) - return formats, {} - - def _extract_video_info(self, video_data): - video_id = str(video_data['id']) - - FORMAT_KEYS = ( - ('sd', 'progressive_url'), - ('hd', 'progressive_url_hd'), - ) - - formats = [] - for format_id, key in FORMAT_KEYS: - video_url = video_data.get(key) - if video_url: - ext = determine_ext(video_url) - if ext == 'm3u8': - continue - bitrate = int_or_none(self._search_regex( - rf'(\d+)\.{ext}', video_url, 'bitrate', default=None)) - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'tbr': bitrate, - 'ext': ext, - }) - - smil_url = video_data.get('smil_url') - if smil_url: - formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False)) - - m3u8_url = video_data.get('m3u8_url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - f4m_url = video_data.get('f4m_url') - if f4m_url: - formats.extend(self._extract_f4m_formats( - f4m_url, video_id, f4m_id='hds', fatal=False)) - - comments = [{ - 'author_id': comment.get('author_id'), - 'author': comment.get('author', {}).get('full_name'), - 'id': comment.get('id'), - 'text': comment['text'], - 'timestamp': parse_iso8601(comment.get('created_at')), - } for comment in video_data.get('comments', {}).get('data', [])] - - return { - 'id': video_id, - 'formats': formats, - 'title': video_data['caption'], - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail_url'), - 'duration': float_or_none(video_data.get('duration'), 1000), - 'timestamp': parse_iso8601(video_data.get('publish_at')), - 'like_count': video_data.get('likes', {}).get('total'), - 'comment_count': video_data.get('comments', {}).get('total'), - 'view_count': video_data.get('views'), - 'comments': comments, - } - - def _extract_stream_info(self, stream_info): - broadcast_id = str(stream_info['broadcast_id']) - is_live = stream_info.get('is_live') - - formats = [] - smil_url = stream_info.get('play_url') - if smil_url: - formats.extend(self._extract_smil_formats(smil_url, broadcast_id)) - - m3u8_url = stream_info.get('m3u8_url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, broadcast_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - rtsp_url = stream_info.get('rtsp_url') - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - - return { - 'id': broadcast_id, - 'formats': formats, - 'title': stream_info['stream_title'], - 'thumbnail': stream_info.get('thumbnail_url'), - 'is_live': is_live, - } - - def _generate_event_playlist(self, event_data): - event_id = str(event_data['id']) - account_id = str(event_data['owner_account_id']) - feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json' - - stream_info = event_data.get('stream_info') - if stream_info: - return self._extract_stream_info(stream_info) - - last_video = None - for i in itertools.count(1): - if last_video is None: - info_url = feed_root_url - else: - info_url = f'{feed_root_url}?&id={last_video}&newer=-1&type=video' - videos_info = self._download_json( - info_url, event_id, f'Downloading page {i}')['data'] - videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] - if not videos_info: - break - for v in videos_info: - v_id = str(v['id']) - yield self.url_result( - f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}', - LivestreamIE, v_id, v.get('caption')) - last_video = videos_info[-1]['id'] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - event = mobj.group('event_id') or mobj.group('event_name') - account = mobj.group('account_id') or mobj.group('account_name') - api_url = f'http://livestream.com/api/accounts/{account}' - - if video_id: - video_data = self._download_json( - f'{api_url}/events/{event}/videos/{video_id}', video_id) - return self._extract_video_info(video_data) - elif event: - event_data = self._download_json(f'{api_url}/events/{event}', None) - return self.playlist_result( - self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name']) - - account_data = self._download_json(api_url, None) - items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...)) - return self.playlist_result( - itertools.chain.from_iterable(map(self._generate_event_playlist, items)), - account_data.get('id'), account_data.get('full_name')) - - -# The original version of Livestream uses a different system -class LivestreamOriginalIE(InfoExtractor): - IE_NAME = 'livestream:original' - _VALID_URL = r'''(?x)https?://original\.livestream\.com/ - (?P[^/\?#]+)(?:/(?Pvideo|folder) - (?:(?:\?.*?Id=|/)(?P.*?)(&|$))?)? - ''' - _TESTS = [{ - 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - 'info_dict': { - 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - 'ext': 'mp4', - 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', - 'duration': 771.301, - 'view_count': int, - }, - }, { - 'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', - 'info_dict': { - 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', - }, - 'playlist_mincount': 4, - }, { - # live stream - 'url': 'http://original.livestream.com/znsbahamas', - 'only_matching': True, - }] - - def _extract_video_info(self, user, video_id): - api_url = f'http://x{user}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={video_id}' - info = self._download_xml(api_url, video_id) - - item = info.find('channel').find('item') - title = xpath_text(item, 'title') - media_ns = {'media': 'http://search.yahoo.com/mrss'} - thumbnail_url = xpath_attr( - item, xpath_with_ns('media:thumbnail', media_ns), 'url') - duration = float_or_none(xpath_attr( - item, xpath_with_ns('media:content', media_ns), 'duration')) - ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'} - view_count = int_or_none(xpath_text( - item, xpath_with_ns('ls:viewsCount', ls_ns))) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail_url, - 'duration': duration, - 'view_count': view_count, - } - - def _extract_video_formats(self, video_data, video_id): - formats = [] - - progressive_url = video_data.get('progressiveUrl') - if progressive_url: - formats.append({ - 'url': progressive_url, - 'format_id': 'http', - }) - - m3u8_url = video_data.get('httpUrl') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - rtsp_url = video_data.get('rtspUrl') - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - - return formats - - def _extract_folder(self, url, folder_id): - webpage = self._download_webpage(url, folder_id) - paths = orderedSet(re.findall( - r'''(?x)(?: - \s*.+)' - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - return self.url_result(self._og_search_url(webpage)) diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py deleted file mode 100644 index f7cf9261a8..0000000000 --- a/yt_dlp/extractor/lynda.py +++ /dev/null @@ -1,325 +0,0 @@ -import itertools -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - urlencode_postdata, -) - - -class LyndaBaseIE(InfoExtractor): - _SIGNIN_URL = 'https://www.lynda.com/signin/lynda' - _PASSWORD_URL = 'https://www.lynda.com/signin/password' - _USER_URL = 'https://www.lynda.com/signin/user' - _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' - _NETRC_MACHINE = 'lynda' - - @staticmethod - def _check_error(json_string, key_or_keys): - keys = [key_or_keys] if isinstance(key_or_keys, str) else key_or_keys - for key in keys: - error = json_string.get(key) - if error: - raise ExtractorError(f'Unable to login: {error}', expected=True) - - def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): - action_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', form_html, - 'post url', default=fallback_action_url, group='url') - - if not action_url.startswith('http'): - action_url = urllib.parse.urljoin(self._SIGNIN_URL, action_url) - - form_data = self._hidden_inputs(form_html) - form_data.update(extra_form_data) - - response = self._download_json( - action_url, None, note, - data=urlencode_postdata(form_data), - headers={ - 'Referer': referrer_url, - 'X-Requested-With': 'XMLHttpRequest', - }, expected_status=(418, 500)) - - self._check_error(response, ('email', 'password', 'ErrorMessage')) - - return response, action_url - - def _perform_login(self, username, password): - # Step 1: download signin page - signin_page = self._download_webpage( - self._SIGNIN_URL, None, 'Downloading signin page') - - # Already logged in - if any(re.search(p, signin_page) for p in ( - r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): - return - - # Step 2: submit email - signin_form = self._search_regex( - r'(?s)(]+data-form-name=["\']signin["\'][^>]*>.+?)', - signin_page, 'signin form') - signin_page, signin_url = self._login_step( - signin_form, self._PASSWORD_URL, {'email': username}, - 'Submitting email', self._SIGNIN_URL) - - # Step 3: submit password - password_form = signin_page['body'] - self._login_step( - password_form, self._USER_URL, {'email': username, 'password': password}, - 'Submitting password', signin_url) - - -class LyndaIE(LyndaBaseIE): - IE_NAME = 'lynda' - IE_DESC = 'lynda.com videos' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?(?:lynda\.com|educourse\.ga)/ - (?: - (?:[^/]+/){2,3}(?P\d+)| - player/embed - )/ - (?P\d+) - ''' - - _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' - - _TESTS = [{ - 'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', - # md5 is unstable - 'info_dict': { - 'id': '114408', - 'ext': 'mp4', - 'title': 'Using the exercise files', - 'duration': 68, - }, - }, { - 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', - 'only_matching': True, - }, { - 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', - 'only_matching': True, - }, { - 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', - 'only_matching': True, - }, { - # Status="NotFound", Message="Transcript not found" - 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', - 'only_matching': True, - }] - - def _raise_unavailable(self, video_id): - self.raise_login_required( - f'Video {video_id} is only available for members') - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - course_id = mobj.group('course_id') - - query = { - 'videoId': video_id, - 'type': 'video', - } - - video = self._download_json( - 'https://www.lynda.com/ajax/player', video_id, - 'Downloading video JSON', fatal=False, query=query) - - # Fallback scenario - if not video: - query['courseId'] = course_id - - play = self._download_json( - f'https://www.lynda.com/ajax/course/{course_id}/{video_id}/play', video_id, 'Downloading play JSON') - - if not play: - self._raise_unavailable(video_id) - - formats = [] - for formats_dict in play: - urls = formats_dict.get('urls') - if not isinstance(urls, dict): - continue - cdn = formats_dict.get('name') - for format_id, format_url in urls.items(): - if not format_url: - continue - formats.append({ - 'url': format_url, - 'format_id': f'{cdn}-{format_id}' if cdn else format_id, - 'height': int_or_none(format_id), - }) - - conviva = self._download_json( - 'https://www.lynda.com/ajax/player/conviva', video_id, - 'Downloading conviva JSON', query=query) - - return { - 'id': video_id, - 'title': conviva['VideoTitle'], - 'description': conviva.get('VideoDescription'), - 'release_year': int_or_none(conviva.get('ReleaseYear')), - 'duration': int_or_none(conviva.get('Duration')), - 'creator': conviva.get('Author'), - 'formats': formats, - } - - if 'Status' in video: - raise ExtractorError( - 'lynda returned error: {}'.format(video['Message']), expected=True) - - if video.get('HasAccess') is False: - self._raise_unavailable(video_id) - - video_id = str(video.get('ID') or video_id) - duration = int_or_none(video.get('DurationInSeconds')) - title = video['Title'] - - formats = [] - - fmts = video.get('Formats') - if fmts: - formats.extend([{ - 'url': f['Url'], - 'ext': f.get('Extension'), - 'width': int_or_none(f.get('Width')), - 'height': int_or_none(f.get('Height')), - 'filesize': int_or_none(f.get('FileSize')), - 'format_id': str(f.get('Resolution')) if f.get('Resolution') else None, - } for f in fmts if f.get('Url')]) - - prioritized_streams = video.get('PrioritizedStreams') - if prioritized_streams: - for prioritized_stream_id, prioritized_stream in prioritized_streams.items(): - formats.extend([{ - 'url': video_url, - 'height': int_or_none(format_id), - 'format_id': f'{prioritized_stream_id}-{format_id}', - } for format_id, video_url in prioritized_stream.items()]) - - self._check_formats(formats, video_id) - - subtitles = self.extract_subtitles(video_id) - - return { - 'id': video_id, - 'title': title, - 'duration': duration, - 'subtitles': subtitles, - 'formats': formats, - } - - def _fix_subtitles(self, subs): - srt = '' - seq_counter = 0 - for seq_current, seq_next in itertools.pairwise(subs): - m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) - if m_current is None: - continue - m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) - if m_next is None: - continue - appear_time = m_current.group('timecode') - disappear_time = m_next.group('timecode') - text = seq_current['Caption'].strip() - if text: - seq_counter += 1 - srt += f'{seq_counter}\r\n{appear_time} --> {disappear_time}\r\n{text}\r\n\r\n' - if srt: - return srt - - def _get_subtitles(self, video_id): - url = f'https://www.lynda.com/ajax/player?videoId={video_id}&type=transcript' - subs = self._download_webpage( - url, video_id, 'Downloading subtitles JSON', fatal=False) - if not subs or 'Status="NotFound"' in subs: - return {} - subs = self._parse_json(subs, video_id, fatal=False) - if not subs: - return {} - fixed_subs = self._fix_subtitles(subs) - if fixed_subs: - return {'en': [{'ext': 'srt', 'data': fixed_subs}]} - return {} - - -class LyndaCourseIE(LyndaBaseIE): - IE_NAME = 'lynda:course' - IE_DESC = 'lynda.com online courses' - - # Course link equals to welcome/introduction video link of same course - # We will recognize it as course link - _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P(?:[^/]+/){2,3}(?P\d+))-2\.html' - - _TESTS = [{ - 'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', - 'only_matching': True, - }, { - 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - course_path = mobj.group('coursepath') - course_id = mobj.group('courseid') - - item_template = f'https://www.lynda.com/{course_path}/%s-4.html' - - course = self._download_json( - f'https://www.lynda.com/ajax/player?courseId={course_id}&type=course', - course_id, 'Downloading course JSON', fatal=False) - - if not course: - webpage = self._download_webpage(url, course_id) - entries = [ - self.url_result( - item_template % video_id, ie=LyndaIE.ie_key(), - video_id=video_id) - for video_id in re.findall( - r'data-video-id=["\'](\d+)', webpage)] - return self.playlist_result( - entries, course_id, - self._og_search_title(webpage, fatal=False), - self._og_search_description(webpage)) - - if course.get('Status') == 'NotFound': - raise ExtractorError( - f'Course {course_id} does not exist', expected=True) - - unaccessible_videos = 0 - entries = [] - - # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided - # by single video API anymore - - for chapter in course['Chapters']: - for video in chapter.get('Videos', []): - if video.get('HasAccess') is False: - unaccessible_videos += 1 - continue - video_id = video.get('ID') - if video_id: - entries.append({ - '_type': 'url_transparent', - 'url': item_template % video_id, - 'ie_key': LyndaIE.ie_key(), - 'chapter': chapter.get('Title'), - 'chapter_number': int_or_none(chapter.get('ChapterIndex')), - 'chapter_id': str(chapter.get('ID')), - }) - - if unaccessible_videos > 0: - self.report_warning( - f'{unaccessible_videos} videos are only available for members (or paid members) ' - f'and will not be downloaded. {self._ACCOUNT_CREDENTIALS_HINT}') - - course_title = course.get('Title') - course_description = course.get('Description') - - return self.playlist_result(entries, course_id, course_title, course_description) diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py deleted file mode 100644 index b138810838..0000000000 --- a/yt_dlp/extractor/microsoftstream.py +++ /dev/null @@ -1,121 +0,0 @@ -import base64 - -from .common import InfoExtractor -from ..utils import ( - merge_dicts, - parse_duration, - parse_iso8601, - parse_resolution, - try_get, - url_basename, -) - - -class MicrosoftStreamIE(InfoExtractor): - IE_NAME = 'microsoftstream' - IE_DESC = 'Microsoft Stream' - _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - - _TESTS = [{ - 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0', - 'only_matching': True, - }, { - 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca', - 'only_matching': True, - }] - - def _get_all_subtitles(self, api_url, video_id, headers): - subtitles = {} - automatic_captions = {} - text_tracks = self._download_json( - f'{api_url}/videos/{video_id}/texttracks', video_id, - note='Downloading subtitles JSON', fatal=False, headers=headers, - query={'api-version': '1.4-private'}).get('value') or [] - for track in text_tracks: - if not track.get('language') or not track.get('url'): - continue - sub_dict = automatic_captions if track.get('autoGenerated') else subtitles - sub_dict.setdefault(track['language'], []).append({ - 'ext': 'vtt', - 'url': track.get('url'), - }) - return { - 'subtitles': subtitles, - 'automatic_captions': automatic_captions, - } - - def extract_all_subtitles(self, *args, **kwargs): - if (self.get_param('writesubtitles', False) - or self.get_param('writeautomaticsub', False) - or self.get_param('listsubtitles')): - return self._get_all_subtitles(*args, **kwargs) - return {} - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - if 'Microsoft Stream' not in webpage: - self.raise_login_required(method='cookies') - - access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token') - api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url') - - headers = {'Authorization': f'Bearer {access_token}'} - - video_data = self._download_json( - f'{api_url}/videos/{video_id}', video_id, - headers=headers, query={ - '$expand': 'creator,tokens,status,liveEvent,extensions', - 'api-version': '1.4-private', - }) - video_id = video_data.get('id') or video_id - language = video_data.get('language') - - thumbnails = [] - for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'): - thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str) - if not thumbnail_url: - continue - thumb = { - 'id': thumbnail_id, - 'url': thumbnail_url, - } - thumb_name = url_basename(thumbnail_url) - thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) - thumb.update(parse_resolution(thumb_name)) - thumbnails.append(thumb) - - formats = [] - for playlist in video_data['playbackUrls']: - if playlist['mimeType'] == 'application/vnd.apple.mpegurl': - formats.extend(self._extract_m3u8_formats( - playlist['playbackUrl'], video_id, - ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False, headers=headers)) - elif playlist['mimeType'] == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - playlist['playbackUrl'], video_id, mpd_id='dash', - fatal=False, headers=headers)) - elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml': - formats.extend(self._extract_ism_formats( - playlist['playbackUrl'], video_id, ism_id='mss', - fatal=False, headers=headers)) - formats = [merge_dicts(f, {'language': language}) for f in formats] - - return { - 'id': video_id, - 'title': video_data['name'], - 'description': video_data.get('description'), - 'uploader': try_get(video_data, lambda x: x['creator']['name'], str), - 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'], - lambda x: x['creator']['id']), str), - 'thumbnails': thumbnails, - **self.extract_all_subtitles(api_url, video_id, headers), - 'timestamp': parse_iso8601(video_data.get('created')), - 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])), - 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}', - 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int), - 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int), - 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int), - 'formats': formats, - } diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py deleted file mode 100644 index 69832560d0..0000000000 --- a/yt_dlp/extractor/minoto.py +++ /dev/null @@ -1,45 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_codecs, -) - - -class MinotoIE(InfoExtractor): - _VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P[0-9]+)/)(?P[a-zA-Z0-9]+)' - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - player_id = mobj.group('player_id') or '1' - video_id = mobj.group('id') - video_data = self._download_json(f'http://play.minoto-video.com/{player_id}/{video_id}.js', video_id) - video_metadata = video_data['video-metadata'] - formats = [] - for fmt in video_data['video-files']: - fmt_url = fmt.get('url') - if not fmt_url: - continue - container = fmt.get('container') - if container == 'hls': - formats.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - else: - fmt_profile = fmt.get('profile') or {} - formats.append({ - 'format_id': fmt_profile.get('name-short'), - 'format_note': fmt_profile.get('name'), - 'url': fmt_url, - 'container': container, - 'tbr': int_or_none(fmt.get('bitrate')), - 'filesize': int_or_none(fmt.get('filesize')), - 'width': int_or_none(fmt.get('width')), - 'height': int_or_none(fmt.get('height')), - **parse_codecs(fmt.get('codecs')), - }) - - return { - 'id': video_id, - 'title': video_metadata['title'], - 'description': video_metadata.get('description'), - 'thumbnail': video_metadata.get('video-poster', {}).get('url'), - 'formats': formats, - } diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py deleted file mode 100644 index 6bc362a53c..0000000000 --- a/yt_dlp/extractor/mojvideo.py +++ /dev/null @@ -1,52 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, -) - - -class MojvideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P[^/]+)/(?P[a-f0-9]+)' - _TEST = { - 'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906', - 'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7', - 'info_dict': { - 'id': '3d1ed4497707730b2906', - 'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic', - 'ext': 'mp4', - 'title': 'V avtu pred mano rdečelaska - Alfi Nipič', - 'thumbnail': r're:^http://.*\.jpg$', - 'duration': 242, - }, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - # XML is malformed - playerapi = self._download_webpage( - f'http://www.mojvideo.com/playerapi.php?v={video_id}&t=1', display_id) - - if 'true' in playerapi: - error_desc = self._html_search_regex( - r'([^<]*)', playerapi, 'error description', fatal=False) - raise ExtractorError(f'{self.IE_NAME} said: {error_desc}', expected=True) - - title = self._html_extract_title(playerapi) - video_url = self._html_search_regex( - r'([^<]+)', playerapi, 'video URL') - thumbnail = self._html_search_regex( - r'([^<]+)', playerapi, 'thumbnail', fatal=False) - duration = parse_duration(self._html_search_regex( - r'([^<]+)', playerapi, 'duration', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - } diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py deleted file mode 100644 index e236ec3db8..0000000000 --- a/yt_dlp/extractor/motherless.py +++ /dev/null @@ -1,289 +0,0 @@ -import datetime as dt -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - remove_end, - str_to_int, - unified_strdate, -) - - -class MotherlessIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P[A-F0-9]+)' - _TESTS = [{ - 'url': 'http://motherless.com/EE97006', - 'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc', - 'info_dict': { - 'id': 'EE97006', - 'ext': 'mp4', - 'title': 'Dogging blond Brit getting glazed (comp)', - 'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'], - 'upload_date': '20230519', - 'uploader_id': 'deathbird', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - 'comment_count': int, - 'view_count': int, - 'like_count': int, - }, - 'params': { - # Incomplete cert chains - 'nocheckcertificate': True, - }, - }, { - 'url': 'http://motherless.com/532291B', - 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', - 'info_dict': { - 'id': '532291B', - 'ext': 'mp4', - 'title': 'Amazing girl playing the omegle game, PERFECT!', - 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', - 'game', 'hairy'], - 'upload_date': '20140622', - 'uploader_id': 'Sulivana7x', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - }, - 'skip': '404', - }, { - 'url': 'http://motherless.com/g/cosplay/633979F', - 'expected_exception': 'ExtractorError', - }, { - 'url': 'http://motherless.com/8B4BBC1', - 'info_dict': { - 'id': '8B4BBC1', - 'ext': 'mp4', - 'title': 'VIDEO00441.mp4', - 'categories': [], - 'upload_date': '20160214', - 'uploader_id': 'NMWildGirl', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - 'like_count': int, - 'comment_count': int, - 'view_count': int, - }, - 'params': { - 'nocheckcertificate': True, - }, - }, { - # see https://motherless.com/videos/recent for recent videos with - # uploaded date in "ago" format - 'url': 'https://motherless.com/3C3E2CF', - 'info_dict': { - 'id': '3C3E2CF', - 'ext': 'mp4', - 'title': 'a/ Hot Teens', - 'categories': list, - 'upload_date': '20210104', - 'uploader_id': 'anonymous', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - 'like_count': int, - 'comment_count': int, - 'view_count': int, - }, - 'params': { - 'nocheckcertificate': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if any(p in webpage for p in ( - '404 - MOTHERLESS.COM<', - ">The page you're looking for cannot be found.<", - '<div class="error-page', - )): - raise ExtractorError(f'Video {video_id} does not exist', expected=True) - - if '>The content you are trying to view is for friends only.' in webpage: - raise ExtractorError(f'Video {video_id} is for friends only', expected=True) - - title = self._html_search_regex( - (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>', - r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title') - video_url = (self._html_search_regex( - (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', - r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), - webpage, 'video URL', default=None, group='url') - or f'http://cdn4.videos.motherlessmedia.com/videos/{video_id}.mp4?fs=opencloud') - age_limit = self._rta_search(webpage) - view_count = str_to_int(self._html_search_regex( - (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), - webpage, 'view count', fatal=False)) - like_count = str_to_int(self._html_search_regex( - (r'>([\d,.]+)\s+Favorites<', - r'<strong>Favorited</strong>\s+([^<]+)<'), - webpage, 'like count', fatal=False)) - - upload_date = unified_strdate(self._search_regex( - r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage, - 'upload date', default=None)) - if not upload_date: - uploaded_ago = self._search_regex( - r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago', - default=None) - if uploaded_ago: - delta = int(uploaded_ago[:-1]) - _AGO_UNITS = { - 'h': 'hours', - 'd': 'days', - } - kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} - upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d') - - comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage)) - uploader_id = self._html_search_regex( - (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''', - r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''), - webpage, 'uploader_id', fatal=False) - categories = self._html_search_meta('keywords', webpage, default='') - categories = [cat.strip() for cat in categories.split(',') if cat.strip()] - - return { - 'id': video_id, - 'title': title, - 'upload_date': upload_date, - 'uploader_id': uploader_id, - 'thumbnail': self._og_search_thumbnail(webpage), - 'categories': categories, - 'view_count': view_count, - 'like_count': like_count, - 'comment_count': comment_count, - 'age_limit': age_limit, - 'url': video_url, - } - - -class MotherlessPaginatedIE(InfoExtractor): - _EXTRA_QUERY = {} - _PAGE_SIZE = 60 - - def _correct_path(self, url, item_id): - raise NotImplementedError('This method must be implemented by subclasses') - - def _correct_title(self, title, /): - return title.partition(' - Videos')[0] if title else None - - def _extract_entries(self, webpage, base): - for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)', - webpage): - video_url = urllib.parse.urljoin(base, mobj.group('href')) - video_id = MotherlessIE.get_temp_id(video_url) - - if video_id: - yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title')) - - def _real_extract(self, url): - item_id = self._match_id(url) - real_url = self._correct_path(url, item_id) - webpage = self._download_webpage(real_url, item_id, 'Downloading page 1') - - def get_page(idx): - page = idx + 1 - current_page = webpage if not idx else self._download_webpage( - real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY}) - yield from self._extract_entries(current_page, real_url) - - return self.playlist_result( - OnDemandPagedList(get_page, self._PAGE_SIZE), item_id, - self._correct_title(self._html_extract_title(webpage))) - - -class MotherlessGroupIE(MotherlessPaginatedIE): - _VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])' - _TESTS = [{ - 'url': 'http://motherless.com/gv/movie_scenes', - 'info_dict': { - 'id': 'movie_scenes', - 'title': 'Movie Scenes', - }, - 'playlist_mincount': 540, - }, { - 'url': 'http://motherless.com/g/sex_must_be_funny', - 'info_dict': { - 'id': 'sex_must_be_funny', - 'title': 'Sex must be funny', - }, - 'playlist_count': 0, - }, { - 'url': 'https://motherless.com/gv/beautiful_cock', - 'info_dict': { - 'id': 'beautiful_cock', - 'title': 'Beautiful Cock', - }, - 'playlist_mincount': 371, - }] - - def _correct_path(self, url, item_id): - return urllib.parse.urljoin(url, f'/gv/{item_id}') - - -class MotherlessGalleryIE(MotherlessPaginatedIE): - _VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])' - _TESTS = [{ - 'url': 'https://motherless.com/GV338999F', - 'info_dict': { - 'id': '338999F', - 'title': 'Random', - }, - 'playlist_mincount': 100, - }, { - 'url': 'https://motherless.com/GVABD6213', - 'info_dict': { - 'id': 'ABD6213', - 'title': 'Cuties', - }, - 'playlist_mincount': 1, - }, { - 'url': 'https://motherless.com/GVBCF7622', - 'info_dict': { - 'id': 'BCF7622', - 'title': 'Vintage', - }, - 'playlist_count': 0, - }, { - 'url': 'https://motherless.com/G035DE2F', - 'info_dict': { - 'id': '035DE2F', - 'title': 'General', - }, - 'playlist_mincount': 234, - }] - - def _correct_title(self, title, /): - return remove_end(title, ' | MOTHERLESS.COM ™') - - def _correct_path(self, url, item_id): - return urllib.parse.urljoin(url, f'/GV{item_id}') - - -class MotherlessUploaderIE(MotherlessPaginatedIE): - _VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])' - _TESTS = [{ - 'url': 'https://motherless.com/u/Mrgo4hrs2023', - 'info_dict': { - 'id': 'Mrgo4hrs2023', - 'title': "Mrgo4hrs2023's Uploads", - }, - 'playlist_mincount': 32, - }, { - 'url': 'https://motherless.com/u/Happy_couple?t=v', - 'info_dict': { - 'id': 'Happy_couple', - 'title': "Happy_couple's Uploads", - }, - 'playlist_mincount': 8, - }] - - _EXTRA_QUERY = {'t': 'v'} - - def _correct_path(self, url, item_id): - return urllib.parse.urljoin(url, f'/u/{item_id}?t=v') diff --git a/yt_dlp/extractor/moview.py b/yt_dlp/extractor/moview.py deleted file mode 100644 index 560154e1a1..0000000000 --- a/yt_dlp/extractor/moview.py +++ /dev/null @@ -1,43 +0,0 @@ -from .jixie import JixieBaseIE - - -class MoviewPlayIE(JixieBaseIE): - _VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)' - _TESTS = [ - { - # drm hls, only use direct link - 'url': 'https://www.moview.id/play/174/Candy-Monster', - 'info_dict': { - 'id': '146182', - 'ext': 'mp4', - 'display_id': 'Candy-Monster', - 'uploader_id': 'Mo165qXUUf', - 'duration': 528.2, - 'title': 'Candy Monster', - 'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?', - 'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg', - }, - }, { - # non-drm hls - 'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16', - 'info_dict': { - 'id': '28210', - 'ext': 'mp4', - 'duration': 2595.666667, - 'display_id': 'Paris-Van-Java-Episode-16', - 'uploader_id': 'Mo165qXUUf', - 'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg', - 'description': 'md5:2a5e18d98eef9b39d7895029cac96c63', - 'title': 'Paris Van Java Episode 16', - }, - }, - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_id = self._search_regex( - r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id') - - return self._extract_data_from_jixie_id(display_id, video_id, webpage) diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py deleted file mode 100644 index 331a56234b..0000000000 --- a/yt_dlp/extractor/moviezine.py +++ /dev/null @@ -1,38 +0,0 @@ -from .common import InfoExtractor - - -class MoviezineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' - - _TEST = { - 'url': 'http://www.moviezine.se/video/205866', - 'info_dict': { - 'id': '205866', - 'ext': 'mp4', - 'title': 'Oculus - Trailer 1', - 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', - 'thumbnail': r're:http://.*\.jpg', - }, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - jsplayer = self._download_webpage(f'http://www.moviezine.se/api/player.js?video={video_id}', video_id, 'Downloading js api player') - - formats = [{ - 'format_id': 'sd', - 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), - 'quality': 0, - 'ext': 'mp4', - }] - - return { - 'id': video_id, - 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), - 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'), - 'formats': formats, - 'description': self._og_search_description(webpage), - } diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py deleted file mode 100644 index 5ca390ef9a..0000000000 --- a/yt_dlp/extractor/musicdex.py +++ /dev/null @@ -1,174 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - date_from_str, - format_field, - try_get, - unified_strdate, -) - - -class MusicdexBaseIE(InfoExtractor): - def _return_info(self, track_json, album_json, video_id): - return { - 'id': str(video_id), - 'title': track_json.get('name'), - 'track': track_json.get('name'), - 'description': track_json.get('description'), - 'track_number': track_json.get('number'), - 'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'), - 'duration': track_json.get('duration'), - 'genres': [genre.get('name') for genre in track_json.get('genres') or []], - 'like_count': track_json.get('likes_count'), - 'view_count': track_json.get('plays'), - 'artists': [artist.get('name') for artist in track_json.get('artists') or []], - 'album_artists': [artist.get('name') for artist in album_json.get('artists') or []], - 'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'), - 'album': album_json.get('name'), - 'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year), - 'extractor_key': MusicdexSongIE.ie_key(), - 'extractor': 'MusicdexSong', - } - - -class MusicdexSongIE(MusicdexBaseIE): - _VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)' - - _TESTS = [{ - 'url': 'https://www.musicdex.org/track/306/dual-existence', - 'info_dict': { - 'id': '306', - 'ext': 'mp3', - 'title': 'dual existence', - 'description': '#NIPPONSEI @ IRC.RIZON.NET', - 'track': 'dual existence', - 'track_number': 1, - 'duration': 266000, - 'genres': ['Anime'], - 'like_count': int, - 'view_count': int, - 'artists': ['fripSide'], - 'album_artists': ['fripSide'], - 'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png', - 'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence', - 'release_year': 2020, - }, - 'params': {'skip_download': True}, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data_json = self._download_json( - f'https://www.musicdex.org/secure/tracks/{video_id}?defaultRelations=true', video_id)['track'] - return self._return_info(data_json, data_json.get('album') or {}, video_id) - - -class MusicdexAlbumIE(MusicdexBaseIE): - _VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)' - - _TESTS = [{ - 'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo', - 'playlist_mincount': 28, - 'info_dict': { - 'id': '56', - 'genres': ['OST'], - 'view_count': int, - 'artists': ['TENMON & Eiichiro Yanagi / minori'], - 'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~', - 'release_year': 2008, - 'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg', - }, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - data_json = self._download_json( - f'https://www.musicdex.org/secure/albums/{playlist_id}?defaultRelations=true', playlist_id)['album'] - entries = [self._return_info(track, data_json, track['id']) - for track in data_json.get('tracks') or [] if track.get('id')] - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': data_json.get('name'), - 'description': data_json.get('description'), - 'genres': [genre.get('name') for genre in data_json.get('genres') or []], - 'view_count': data_json.get('plays'), - 'artists': [artist.get('name') for artist in data_json.get('artists') or []], - 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'), - 'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year), - 'entries': entries, - } - - -class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor - def _entries(self, playlist_id): - next_page_url = self._API_URL % playlist_id - while next_page_url: - data_json = self._download_json(next_page_url, playlist_id)['pagination'] - yield from data_json.get('data') or [] - next_page_url = data_json.get('next_page_url') - - -class MusicdexArtistIE(MusicdexPageIE): - _VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)' - _API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1' - - _TESTS = [{ - 'url': 'https://www.musicdex.org/artist/11/fripside', - 'playlist_mincount': 28, - 'info_dict': { - 'id': '11', - 'view_count': int, - 'title': 'fripSide', - 'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg', - }, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{playlist_id}', playlist_id)['artist'] - entries = [] - for album in self._entries(playlist_id): - entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id')) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': data_json.get('name'), - 'view_count': data_json.get('plays'), - 'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'), - 'entries': entries, - } - - -class MusicdexPlaylistIE(MusicdexPageIE): - _VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)' - _API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1' - - _TESTS = [{ - 'url': 'https://www.musicdex.org/playlist/9/test', - 'playlist_mincount': 73, - 'info_dict': { - 'id': '9', - 'view_count': int, - 'title': 'Test', - 'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg', - 'description': 'Test 123 123 21312 32121321321321312', - }, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{playlist_id}', playlist_id)['playlist'] - entries = [self._return_info(track, track.get('album') or {}, track['id']) - for track in self._entries(playlist_id) or [] if track.get('id')] - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': data_json.get('name'), - 'description': data_json.get('description'), - 'view_count': data_json.get('plays'), - 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'), - 'entries': entries, - } diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index 43f84a9527..d06680ccff 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -1,64 +1,4 @@ -from .common import InfoExtractor from .fox import FOXIE -from ..utils import ( - smuggle_url, - url_basename, -) - - -class NationalGeographicVideoIE(InfoExtractor): - IE_NAME = 'natgeo:video' - _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' - - _TESTS = [ - { - 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', - 'md5': '730855d559abbad6b42c2be1fa584917', - 'info_dict': { - 'id': '0000014b-70a1-dd8c-af7f-f7b559330001', - 'ext': 'mp4', - 'title': 'Mating Crabs Busted by Sharks', - 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', - 'timestamp': 1423523799, - 'upload_date': '20150209', - 'uploader': 'NAGS', - }, - 'add_ie': ['ThePlatform'], - 'skip': 'Redirects to main page', - }, - { - 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', - 'md5': '6a3105eb448c070503b3105fb9b320b5', - 'info_dict': { - 'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e', - 'ext': 'mp4', - 'title': 'The Real Jaws', - 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', - 'timestamp': 1433772632, - 'upload_date': '20150608', - 'uploader': 'NAGS', - }, - 'add_ie': ['ThePlatform'], - 'skip': 'Redirects to main page', - }, - ] - - def _real_extract(self, url): - name = url_basename(url) - - webpage = self._download_webpage(url, name) - guid = self._search_regex( - r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"', - webpage, 'guid') - - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - f'http://link.theplatform.com/s/ngs/media/guid/2423130747/{guid}?mbr=true', - {'force_smil_url': True}), - 'id': guid, - } class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index fa7b56ca75..ffb4da2622 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -13,11 +13,9 @@ from ..utils import ( dict_get, int_or_none, join_nonempty, - merge_dicts, parse_iso8601, traverse_obj, try_get, - unified_timestamp, update_url_query, url_or_none, ) @@ -284,142 +282,3 @@ class NaverLiveIE(NaverBaseIE): }), get_all=False), 'is_live': True, } - - -class NaverNowIE(NaverBaseIE): - IE_NAME = 'navernow' - _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)' - _API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4' - _TESTS = [{ - 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=', - 'md5': 'e05854162c21c221481de16b2944a0bc', - 'info_dict': { - 'id': '4759-26331132', - 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1650369600, - 'upload_date': '20220419', - 'uploader_id': 'now', - 'view_count': int, - 'uploader_url': 'https://now.naver.com/show/4759', - 'uploader': '아이키의 떰즈업', - }, - 'params': { - 'noplaylist': True, - }, - }, { - 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=', - 'md5': '9f6118e398aa0f22b2152f554ea7851b', - 'info_dict': { - 'id': '4759-26601461', - 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20220504', - 'timestamp': 1651648311, - 'uploader_id': 'now', - 'view_count': int, - 'uploader_url': 'https://now.naver.com/show/4759', - 'uploader': '아이키의 떰즈업', - }, - 'params': { - 'noplaylist': True, - }, - }, { - 'url': 'https://now.naver.com/s/now.4759', - 'info_dict': { - 'id': '4759', - 'title': '아이키의 떰즈업', - }, - 'playlist_mincount': 101, - }, { - 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay', - 'info_dict': { - 'id': '4759', - 'title': '아이키의 떰즈업', - }, - 'playlist_mincount': 101, - }, { - 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=', - 'info_dict': { - 'id': '4759', - 'title': '아이키의 떰즈업', - }, - 'playlist_mincount': 101, - }, { - 'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay', - 'only_matching': True, - }] - - def _extract_replay(self, show_id, replay_id): - vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id) - in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey'] - return merge_dicts({ - 'id': f'{show_id}-{replay_id}', - 'title': traverse_obj(vod_info, ('episode', 'title')), - 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))), - 'thumbnail': vod_info.get('thumbnail_image_url'), - }, self._extract_video_info(replay_id, vod_info['video_id'], in_key)) - - def _extract_show_replays(self, show_id): - page_size = 15 - page = 1 - while True: - show_vod_info = self._download_json( - f'{self._API_URL}/vod-shows/now.{show_id}', show_id, - query={'page': page, 'page_size': page_size}, - note=f'Downloading JSON vod list for show {show_id} - page {page}', - )['response']['result'] - for v in show_vod_info.get('vod_list') or []: - yield self._extract_replay(show_id, v['id']) - - if len(show_vod_info.get('vod_list') or []) < page_size: - break - page += 1 - - def _extract_show_highlights(self, show_id, highlight_id=None): - page_size = 10 - page = 1 - while True: - highlights_videos = self._download_json( - f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id, - query={'page': page, 'page_size': page_size}, - note=f'Downloading JSON highlights for show {show_id} - page {page}') - - for highlight in highlights_videos.get('results') or []: - if highlight_id and highlight.get('clip_no') != int(highlight_id): - continue - yield merge_dicts({ - 'id': f'{show_id}-{highlight["clip_no"]}', - 'title': highlight.get('title'), - 'timestamp': unified_timestamp(highlight.get('regdate')), - 'thumbnail': highlight.get('thumbnail_url'), - }, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey'])) - - if len(highlights_videos.get('results') or []) < page_size: - break - page += 1 - - def _extract_highlight(self, show_id, highlight_id): - try: - return next(self._extract_show_highlights(show_id, highlight_id)) - except StopIteration: - raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}') - - def _real_extract(self, url): - show_id = self._match_id(url) - qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) - - if not self._yes_playlist(show_id, qs.get('shareHightlight')): - return self._extract_highlight(show_id, qs['shareHightlight'][0]) - elif not self._yes_playlist(show_id, qs.get('shareReplayId')): - return self._extract_replay(show_id, qs['shareReplayId'][0]) - - show_info = self._download_json( - f'{self._API_URL}/shows/now.{show_id}/', show_id, - note=f'Downloading JSON vod list for show {show_id}') - - return self.playlist_result( - itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)), - show_id, show_info.get('title')) diff --git a/yt_dlp/extractor/nerdcubed.py b/yt_dlp/extractor/nerdcubed.py deleted file mode 100644 index 5f5607a20b..0000000000 --- a/yt_dlp/extractor/nerdcubed.py +++ /dev/null @@ -1,38 +0,0 @@ -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..utils import parse_iso8601, url_or_none -from ..utils.traversal import traverse_obj - - -class NerdCubedFeedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])' - _TEST = { - 'url': 'http://www.nerdcubed.co.uk/', - 'info_dict': { - 'id': 'nerdcubed-feed', - 'title': 'nerdcubed.co.uk feed', - }, - 'playlist_mincount': 5500, - } - - def _extract_video(self, feed_entry): - return self.url_result( - f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE, - **traverse_obj(feed_entry, { - 'id': ('id', {str}), - 'title': ('title', {str}), - 'description': ('description', {str}), - 'timestamp': ('publishedAt', {parse_iso8601}), - 'channel': ('source', 'name', {str}), - 'channel_id': ('source', 'id', {str}), - 'channel_url': ('source', 'url', {str}), - 'thumbnail': ('thumbnail', 'source', {url_or_none}), - }), url_transparent=True) - - def _real_extract(self, url): - video_id = 'nerdcubed-feed' - feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id) - - return self.playlist_result( - map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))), - video_id, 'nerdcubed.co.uk feed') diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py deleted file mode 100644 index 2ddec5c0ae..0000000000 --- a/yt_dlp/extractor/netverse.py +++ /dev/null @@ -1,281 +0,0 @@ -import itertools - -from .common import InfoExtractor, SearchInfoExtractor -from .dailymotion import DailymotionIE -from ..utils import smuggle_url, traverse_obj - - -class NetverseBaseIE(InfoExtractor): - _ENDPOINTS = { - 'watch': 'watchvideo', - 'video': 'watchvideo', - 'webseries': 'webseries', - 'season': 'webseason_videos', - } - - def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None): - return self._download_json( - f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}', - display_id or slug, query=query) - - def _get_comments(self, video_id): - last_page_number = None - for i in itertools.count(1): - comment_data = self._download_json( - f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}', - video_id, data=b'', fatal=False, query={'page': i}, - note=f'Downloading JSON comment metadata page {i}') or {} - yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., { - 'id': '_id', - 'text': 'comment', - 'author_id': 'customer_id', - 'author': ('customer', 'name'), - 'author_thumbnail': ('customer', 'profile_picture'), - })) - - if not last_page_number: - last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page')) - if i >= (last_page_number or 0): - break - - -class NetverseIE(NetverseBaseIE): - _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)' - _TESTS = [{ - # Watch video - 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016', - 'info_dict': { - 'id': 'k4yhqUwINAGtmHx3NkL', - 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016', - 'ext': 'mp4', - 'season': 'Season 2016', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'episode_number': 22, - 'episode': 'Episode 22', - 'uploader_id': 'x2ir3vq', - 'age_limit': 0, - 'tags': [], - 'view_count': int, - 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016', - 'duration': 2990, - 'upload_date': '20210722', - 'timestamp': 1626919804, - 'like_count': int, - 'uploader': 'Net Prime', - }, - }, { - # series - 'url': 'https://www.netverse.id/watch/jadoo-seorang-model', - 'info_dict': { - 'id': 'x88izwc', - 'title': 'Jadoo Seorang Model', - 'ext': 'mp4', - 'season': 'Season 2', - 'description': 'md5:8a74f70812cca267e19ee0635f0af835', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'episode_number': 2, - 'episode': 'Episode 2', - 'view_count': int, - 'like_count': int, - 'display_id': 'jadoo-seorang-model', - 'uploader_id': 'x2ir3vq', - 'duration': 635, - 'timestamp': 1646372927, - 'tags': ['PG069497-hellojadooseason2eps2'], - 'upload_date': '20220304', - 'uploader': 'Net Prime', - 'age_limit': 0, - }, - 'skip': 'video get Geo-blocked for some country', - }, { - # non www host - 'url': 'https://netverse.id/watch/tetangga-baru', - 'info_dict': { - 'id': 'k4CNGz7V0HJ7vfwZbXy', - 'ext': 'mp4', - 'title': 'Tetangga Baru', - 'season': 'Season 1', - 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'episode_number': 1, - 'episode': 'Episode 1', - 'timestamp': 1624538169, - 'view_count': int, - 'upload_date': '20210624', - 'age_limit': 0, - 'uploader_id': 'x2ir3vq', - 'like_count': int, - 'uploader': 'Net Prime', - 'tags': ['PG008534', 'tetangga', 'Baru'], - 'display_id': 'tetangga-baru', - 'duration': 1406, - }, - }, { - # /video url - 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1', - 'title': 'Namaku Choi Jadoo', - 'info_dict': { - 'id': 'x887jzz', - 'ext': 'mp4', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'season': 'Season 1', - 'episode_number': 1, - 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5', - 'title': 'Namaku Choi Jadoo', - 'episode': 'Episode 1', - 'age_limit': 0, - 'like_count': int, - 'view_count': int, - 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'], - 'duration': 780, - 'display_id': 'pg067482-hellojadoo-season1', - 'uploader_id': 'x2ir3vq', - 'uploader': 'Net Prime', - 'timestamp': 1645764984, - 'upload_date': '20220225', - }, - 'skip': 'This video get Geo-blocked for some country', - }, { - # video with comments - 'url': 'https://netverse.id/video/episode-1-season-2016-ok-food', - 'info_dict': { - 'id': 'k6hetBPiQMljSxxvAy7', - 'ext': 'mp4', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'display_id': 'episode-1-season-2016-ok-food', - 'like_count': int, - 'description': '', - 'duration': 1471, - 'age_limit': 0, - 'timestamp': 1642405848, - 'episode_number': 1, - 'season': 'Season 2016', - 'uploader_id': 'x2ir3vq', - 'title': 'Episode 1 - Season 2016 - Ok Food', - 'upload_date': '20220117', - 'tags': [], - 'view_count': int, - 'episode': 'Episode 1', - 'uploader': 'Net Prime', - 'comment_count': int, - }, - 'params': { - 'getcomments': True, - }, - }, { - # video with multiple page comment - 'url': 'https://netverse.id/video/match-island-eps-1-fix', - 'info_dict': { - 'id': 'x8aznjc', - 'ext': 'mp4', - 'like_count': int, - 'tags': ['Match-Island', 'Pd00111'], - 'display_id': 'match-island-eps-1-fix', - 'view_count': int, - 'episode': 'Episode 1', - 'uploader': 'Net Prime', - 'duration': 4070, - 'timestamp': 1653068165, - 'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f', - 'age_limit': 0, - 'title': 'Welcome To Match Island', - 'upload_date': '20220520', - 'episode_number': 1, - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', - 'uploader_id': 'x2ir3vq', - 'season': 'Season 1', - 'comment_count': int, - }, - 'params': { - 'getcomments': True, - }, - }] - - def _real_extract(self, url): - display_id, sites_type = self._match_valid_url(url).group('display_id', 'type') - program_json = self._call_api(display_id, sites_type) - videos = program_json['response']['videos'] - - return { - '_type': 'url_transparent', - 'ie_key': DailymotionIE.ie_key(), - 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}), - 'display_id': display_id, - 'title': videos.get('title'), - 'season': videos.get('season_name'), - 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')), - 'description': traverse_obj(videos, ('program_detail', 'description')), - 'episode_number': videos.get('episode_order'), - '__post_extractor': self.extract_comments(display_id), - } - - -class NetversePlaylistIE(NetverseBaseIE): - _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)' - _TESTS = [{ - # multiple season - 'url': 'https://netverse.id/webseries/tetangga-masa-gitu', - 'info_dict': { - 'id': 'tetangga-masa-gitu', - 'title': 'Tetangga Masa Gitu', - }, - 'playlist_count': 519, - }, { - # single season - 'url': 'https://netverse.id/webseries/kelas-internasional', - 'info_dict': { - 'id': 'kelas-internasional', - 'title': 'Kelas Internasional', - }, - 'playlist_count': 203, - }] - - def parse_playlist(self, json_data, playlist_id): - slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0] - for season in traverse_obj(json_data, ('seasons', ..., 'id')): - playlist_json = self._call_api( - slug_sample, 'season', display_id=playlist_id, season_id=season) - - for current_page in range(playlist_json['response']['season_list']['last_page']): - playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1}, - season_id=season, display_id=playlist_id) - for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')): - yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE) - - def _real_extract(self, url): - playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type') - playlist_data = self._call_api(playlist_id, sites_type) - - return self.playlist_result( - self.parse_playlist(playlist_data['response'], playlist_id), - traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')), - traverse_obj(playlist_data, ('response', 'webseries_info', 'title'))) - - -class NetverseSearchIE(SearchInfoExtractor): - _SEARCH_KEY = 'netsearch' - - _TESTS = [{ - 'url': 'netsearch10:tetangga', - 'info_dict': { - 'id': 'tetangga', - 'title': 'tetangga', - }, - 'playlist_count': 10, - }] - - def _search_results(self, query): - last_page = None - for i in itertools.count(1): - search_data = self._download_json( - 'https://api.netverse.id/search/elastic/search', query, - query={'q': query, 'page': i}, note=f'Downloading page {i}') - - videos = traverse_obj(search_data, ('response', 'data', ...)) - for video in videos: - yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE) - - last_page = last_page or traverse_obj(search_data, ('response', 'lastpage')) - if not videos or i >= (last_page or 0): - break diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py deleted file mode 100644 index 697fc6b32e..0000000000 --- a/yt_dlp/extractor/nuum.py +++ /dev/null @@ -1,201 +0,0 @@ -import functools - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - UserNotLive, - filter_dict, - int_or_none, - parse_iso8601, - str_or_none, - url_or_none, -) -from ..utils.traversal import traverse_obj - - -class NuumBaseIE(InfoExtractor): - def _call_api(self, path, video_id, description, query={}): - response = self._download_json( - f'https://nuum.ru/api/v2/{path}', video_id, query=query, - note=f'Downloading {description} metadata', - errnote=f'Unable to download {description} metadata') - if error := response.get('error'): - raise ExtractorError(f'API returned error: {error!r}') - return response['result'] - - def _get_channel_info(self, channel_name): - return self._call_api( - 'broadcasts/public', video_id=channel_name, description='channel', - query={ - 'with_extra': 'true', - 'channel_name': channel_name, - 'with_deleted': 'true', - }) - - def _parse_video_data(self, container, extract_formats=True): - stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {} - media = traverse_obj(stream, ('stream_media', 0, {dict})) or {} - media_url = traverse_obj(media, ( - 'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False) - - video_id = str(container['media_container_id']) - is_live = media.get('media_status') == 'RUNNING' - - formats, subtitles = None, None - headers = {'Referer': 'https://nuum.ru/'} - if extract_formats: - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - media_url, video_id, 'mp4', live=is_live, headers=headers) - - return filter_dict({ - 'id': video_id, - 'is_live': is_live, - 'formats': formats, - 'subtitles': subtitles, - 'http_headers': headers, - **traverse_obj(container, { - 'title': ('media_container_name', {str}), - 'description': ('media_container_description', {str}), - 'timestamp': ('created_at', {parse_iso8601}), - 'channel': ('media_container_channel', 'channel_name', {str}), - 'channel_id': ('media_container_channel', 'channel_id', {str_or_none}), - }), - **traverse_obj(stream, { - 'view_count': ('stream_total_viewers', {int_or_none}), - 'concurrent_view_count': ('stream_current_viewers', {int_or_none}), - }), - **traverse_obj(media, { - 'duration': ('media_duration', {int_or_none}), - 'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}), - }, get_all=False), - }) - - -class NuumMediaIE(NuumBaseIE): - IE_NAME = 'nuum:media' - _VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)' - _TESTS = [{ - 'url': 'https://nuum.ru/streams/1592713-7-days-to-die', - 'only_matching': True, - }, { - 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz', - 'md5': 'ce28837a5bbffe6952d7bfd3d39811b0', - 'info_dict': { - 'id': '1567547', - 'ext': 'mp4', - 'title': 'Toxi$ - Hurtz', - 'description': '', - 'timestamp': 1702631651, - 'upload_date': '20231215', - 'thumbnail': r're:^https?://.+\.jpg', - 'view_count': int, - 'concurrent_view_count': int, - 'channel_id': '6911', - 'channel': 'toxis', - 'duration': 116, - }, - }, { - 'url': 'https://nuum.ru/clips/1552564-pro-misu', - 'md5': 'b248ae1565b1e55433188f11beeb0ca1', - 'info_dict': { - 'id': '1552564', - 'ext': 'mp4', - 'title': 'Про Мису 🙃', - 'timestamp': 1701971828, - 'upload_date': '20231207', - 'thumbnail': r're:^https?://.+\.jpg', - 'view_count': int, - 'concurrent_view_count': int, - 'channel_id': '3320', - 'channel': 'Misalelik', - 'duration': 41, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media') - - return self._parse_video_data(video_data) - - -class NuumLiveIE(NuumBaseIE): - IE_NAME = 'nuum:live' - _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])' - _TESTS = [{ - 'url': 'https://nuum.ru/channel/mts_live', - 'only_matching': True, - }] - - def _real_extract(self, url): - channel = self._match_id(url) - channel_info = self._get_channel_info(channel) - if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False: - raise UserNotLive(video_id=channel) - - info = self._parse_video_data(channel_info['media_container']) - return { - 'webpage_url': f'https://nuum.ru/streams/{info["id"]}', - 'extractor_key': NuumMediaIE.ie_key(), - 'extractor': NuumMediaIE.IE_NAME, - **info, - } - - -class NuumTabIE(NuumBaseIE): - IE_NAME = 'nuum:tab' - _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)' - _TESTS = [{ - 'url': 'https://nuum.ru/channel/dankon_/clips', - 'info_dict': { - 'id': 'dankon__clips', - 'title': 'Dankon_', - }, - 'playlist_mincount': 29, - }, { - 'url': 'https://nuum.ru/channel/dankon_/videos', - 'info_dict': { - 'id': 'dankon__videos', - 'title': 'Dankon_', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://nuum.ru/channel/dankon_/streams', - 'info_dict': { - 'id': 'dankon__streams', - 'title': 'Dankon_', - }, - 'playlist_mincount': 1, - }] - - _PAGE_SIZE = 50 - - def _fetch_page(self, channel_id, tab_type, tab_id, page): - CONTAINER_TYPES = { - 'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'], - 'videos': ['LONG_VIDEO'], - 'streams': ['SINGLE'], - } - - media_containers = self._call_api( - 'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}', - query={ - 'limit': self._PAGE_SIZE, - 'offset': page * self._PAGE_SIZE, - 'channel_id': channel_id, - 'media_container_status': 'STOPPED', - 'media_container_type': CONTAINER_TYPES[tab_type], - }) - for container in traverse_obj(media_containers, (..., {dict})): - metadata = self._parse_video_data(container, extract_formats=False) - yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata) - - def _real_extract(self, url): - channel_name, tab_type = self._match_valid_url(url).group('id', 'type') - tab_id = f'{channel_name}_{tab_type}' - channel_data = self._get_channel_info(channel_name)['channel'] - - return self.playlist_result(OnDemandPagedList(functools.partial( - self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE), - playlist_id=tab_id, playlist_title=channel_data.get('channel_name')) diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py deleted file mode 100644 index 7e30b2d33d..0000000000 --- a/yt_dlp/extractor/onionstudios.py +++ /dev/null @@ -1,41 +0,0 @@ -from .common import InfoExtractor -from ..utils import js_to_json - - -class OnionStudiosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)' - _EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1'] - - _TESTS = [{ - 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', - 'md5': '5a118d466d62b5cd03647cf2c593977f', - 'info_dict': { - 'id': '3459881', - 'ext': 'mp4', - 'title': 'Hannibal charges forward, stops for a cocktail', - 'description': 'md5:545299bda6abf87e5ec666548c6a9448', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'a.v. club', - 'upload_date': '20150619', - 'timestamp': 1434728546, - }, - }, { - 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.onionstudios.com/video/6139.json', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js', - video_id) - mcp_id = str(self._parse_json(self._search_regex( - r'window\.mcpMapping\s*=\s*({.+?});', webpage, - 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) - return self.url_result( - 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id, - 'KinjaEmbed', mcp_id) diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py deleted file mode 100644 index c6ba4b0d3e..0000000000 --- a/yt_dlp/extractor/ora.py +++ /dev/null @@ -1,72 +0,0 @@ -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - get_element_by_attribute, - qualities, - unescapeHTML, -) - - -class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' - _TESTS = [{ - 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', - 'md5': 'fa33717591c631ec93b04b0e330df786', - 'info_dict': { - 'id': '50178', - 'ext': 'mp4', - 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', - 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', - }, - }, { - 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_data = self._search_regex( - r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') - m3u8_url = self._search_regex( - r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) - if m3u8_url: - formats = self._extract_m3u8_formats( - m3u8_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - # similar to GameSpotIE - m3u8_path = urllib.parse.urlparse(m3u8_url).path - QUALITIES_RE = r'((,[a-z]+\d+)+,?)' - available_qualities = self._search_regex( - QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',') - http_path = m3u8_path[1:].split('/', 1)[1] - http_template = re.sub(QUALITIES_RE, r'%s', http_path) - http_template = http_template.replace('.csmil/master.m3u8', '') - http_template = urllib.parse.urljoin( - 'http://videocdn-pmd.ora.tv/', http_template) - preference = qualities( - ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080']) - for q in available_qualities: - formats.append({ - 'url': http_template % q, - 'format_id': q, - 'quality': preference(q), - }) - else: - return self.url_result(self._search_regex( - r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') - - return { - 'id': self._search_regex( - r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), - 'display_id': display_id, - 'title': unescapeHTML(self._og_search_title(webpage)), - 'description': get_element_by_attribute( - 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(self._search_regex( - r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), - 'formats': formats, - } diff --git a/yt_dlp/extractor/piramidetv.py b/yt_dlp/extractor/piramidetv.py deleted file mode 100644 index 29afa9b467..0000000000 --- a/yt_dlp/extractor/piramidetv.py +++ /dev/null @@ -1,99 +0,0 @@ -from .common import InfoExtractor -from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none -from ..utils.traversal import traverse_obj - - -class PiramideTVIE(InfoExtractor): - _VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://piramide.tv/video/wWtBAORdJUTh', - 'info_dict': { - 'id': 'wWtBAORdJUTh', - 'ext': 'mp4', - 'title': 'md5:79f9c8183ea6a35c836923142cf0abcc', - 'description': '', - 'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg', - 'channel': 'León Picarón', - 'channel_id': 'leonpicaron', - 'timestamp': 1696460362, - 'upload_date': '20231004', - }, - }, { - 'url': 'https://piramide.tv/video/wcYn6li79NgN', - 'info_dict': { - 'id': 'wcYn6li79NgN', - 'ext': 'mp4', - 'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1', - 'description': '', - 'channel': 'ARTA GAME', - 'channel_id': 'arta_game', - 'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg', - 'timestamp': 1703434976, - 'upload_date': '20231224', - }, - }] - - def _extract_video(self, video_id): - video_data = self._download_json( - f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False) - next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str})) - return next_video, { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - **traverse_obj(video_data, ('video', { - 'id': ('id', {str}), - 'title': ('title', {str}), - 'description': ('description', {str}), - 'thumbnail': ('media', 'thumbnail', {url_or_none}), - 'channel': ('channel', 'name', {str}), - 'channel_id': ('channel', 'id', {str}), - 'timestamp': ('date', {parse_iso8601}), - })), - } - - def _entries(self, video_id): - visited = set() - while True: - visited.add(video_id) - next_video, info = self._extract_video(video_id) - yield info - if not next_video or next_video in visited: - break - video_id = next_video - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - video_id = self._match_id(url) - if self._yes_playlist(video_id, video_id, smuggled_data): - return self.playlist_result(self._entries(video_id), video_id) - return self._extract_video(video_id)[1] - - -class PiramideTVChannelIE(InfoExtractor): - _VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://piramide.tv/channel/thekalo', - 'playlist_mincount': 10, - 'info_dict': { - 'id': 'thekalo', - }, - }] - - def _entries(self, channel_name): - videos = self._download_json( - f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name) - for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])): - yield self.url_result(smuggle_url( - f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}), - **traverse_obj(video, { - 'id': ('id', {str}), - 'title': ('title', {str}), - 'description': ('description', {str}), - })) - - def _real_extract(self, url): - channel_name = self._match_id(url) - return self.playlist_result(self._entries(channel_name), channel_name) diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py deleted file mode 100644 index 94861836a3..0000000000 --- a/yt_dlp/extractor/planetmarathi.py +++ /dev/null @@ -1,72 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - try_get, - unified_strdate, -) - - -class PlanetMarathiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' - _TESTS = [{ - 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', - 'playlist_mincount': 2, - 'info_dict': { - 'id': 'ek-unad-divas', - }, - 'playlist': [{ - 'info_dict': { - 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', - 'ext': 'mp4', - 'title': 'ek unad divas', - 'alt_title': 'चित्रपट', - 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', - 'episode_number': 1, - 'duration': 5539, - 'upload_date': '20210829', - }, - }], # Trailer skipped - }, { - 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', - 'playlist_mincount': 10, - 'info_dict': { - 'id': 'baap-beep-baap-season-1', - }, - 'playlist': [{ - 'info_dict': { - 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', - 'ext': 'mp4', - 'title': 'Manohar Kanhere', - 'alt_title': 'मनोहर कान्हेरे', - 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', - 'season_number': 1, - 'episode_number': 1, - 'duration': 29, - 'upload_date': '20210829', - }, - }], # Trailers, Episodes, other Character profiles skipped - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - entries = [] - json_data = self._download_json( - f'https://www.planetmarathi.com/api/v1/titles/{playlist_id}/assets', playlist_id)['assets'] - for asset in json_data: - asset_title = asset['mediaAssetName']['en'] - if asset_title == 'Movie': - asset_title = playlist_id.replace('-', ' ') - asset_id = f'{asset["sk"]}_{playlist_id}'.replace('#', '-') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) - entries.append({ - 'id': asset_id, - 'title': asset_title, - 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), - 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), - 'season_number': asset.get('mediaAssetSeason'), - 'episode_number': asset.get('mediaAssetIndexForAssetType'), - 'duration': asset.get('mediaAssetDurationInSeconds'), - 'upload_date': unified_strdate(asset.get('created')), - 'formats': formats, - 'subtitles': subtitles, - }) - return self.playlist_result(entries, playlist_id=playlist_id) diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py deleted file mode 100644 index a4439c8bc5..0000000000 --- a/yt_dlp/extractor/playplustv.py +++ /dev/null @@ -1,100 +0,0 @@ -import json - -from .common import InfoExtractor -from ..networking import PUTRequest -from ..networking.exceptions import HTTPError -from ..utils import ExtractorError, clean_html, int_or_none - - -class PlayPlusTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' - _TEST = { - 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', - 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', - 'info_dict': { - 'id': 'db8d274a5163424e967f35a30ddafb8e', - 'ext': 'mp4', - 'title': 'Capítulo 179 - Final', - 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', - 'timestamp': 1529992740, - 'upload_date': '20180626', - }, - 'skip': 'Requires account credential', - } - _NETRC_MACHINE = 'playplustv' - _GEO_COUNTRIES = ['BR'] - _token = None - _profile_id = None - - def _call_api(self, resource, video_id=None, query=None): - return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ - 'Authorization': 'Bearer ' + self._token, - }, query=query) - - def _perform_login(self, username, password): - req = PUTRequest( - 'https://api.playplus.tv/api/web/login', json.dumps({ - 'email': username, - 'password': password, - }).encode(), { - 'Content-Type': 'application/json; charset=utf-8', - }) - - try: - self._token = self._download_json(req, None)['token'] - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - raise ExtractorError(self._parse_json( - e.cause.response.read(), None)['errorMessage'], expected=True) - raise - - self._profile = self._call_api('Profiles')['list'][0]['_id'] - - def _real_initialize(self): - if not self._token: - self.raise_login_required(method='password') - - def _real_extract(self, url): - project_id, media_id = self._match_valid_url(url).groups() - media = self._call_api( - 'Media', media_id, { - 'profileId': self._profile, - 'projectId': project_id, - 'mediaId': media_id, - })['obj'] - title = media['title'] - - formats = [] - for f in media.get('files', []): - f_url = f.get('url') - if not f_url: - continue - file_info = f.get('fileInfo') or {} - formats.append({ - 'url': f_url, - 'width': int_or_none(file_info.get('width')), - 'height': int_or_none(file_info.get('height')), - }) - - thumbnails = [] - for thumb in media.get('thumbs', []): - thumb_url = thumb.get('url') - if not thumb_url: - continue - thumbnails.append({ - 'url': thumb_url, - 'width': int_or_none(thumb.get('width')), - 'height': int_or_none(thumb.get('height')), - }) - - return { - 'id': media_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': clean_html(media.get('description')) or media.get('shortDescription'), - 'timestamp': int_or_none(media.get('publishDate'), 1000), - 'view_count': int_or_none(media.get('numberOfViews')), - 'comment_count': int_or_none(media.get('numberOfComments')), - 'tags': media.get('tags'), - } diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py deleted file mode 100644 index 2323bd0cf9..0000000000 --- a/yt_dlp/extractor/playwire.py +++ /dev/null @@ -1,79 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - dict_get, - float_or_none, -) - - -class PlaywireIE(InfoExtractor): - _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' - _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1'] - - _TESTS = [{ - 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', - 'md5': 'e6398701e3595888125729eaa2329ed9', - 'info_dict': { - 'id': '3353705', - 'ext': 'mp4', - 'title': 'S04_RM_UCL_Rus', - 'thumbnail': r're:^https?://.*\.png$', - 'duration': 145.94, - }, - 'skip': 'Invalid URL', - }, { - # m3u8 in f4m - 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', - 'info_dict': { - 'id': '4840492', - 'ext': 'mp4', - 'title': 'ITV EL SHOW FULL', - }, - 'skip': 'Invalid URL', - }, { - # Multiple resolutions while bitrates missing - 'url': 'http://cdn.playwire.com/11625/embed/85228.html', - 'only_matching': True, - }, { - 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', - 'only_matching': True, - }, { - 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', - 'only_matching': True, - }] - _WEBPAGE_TESTS = [{ - 'url': 'https://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html', - 'info_dict': { - 'id': '3519514', - 'ext': 'mp4', - 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', - }, - 'skip': 'Site no longer embeds Playwire', - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') - - player = self._download_json( - f'http://config.playwire.com/{publisher_id}/videos/v2/{video_id}/zeus.json', - video_id) - - title = player['settings']['title'] - duration = float_or_none(player.get('duration'), 1000) - - content = player['content'] - thumbnail = content.get('poster') - src = content['media']['f4m'] - - formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') - for a_format in formats: - if not dict_get(a_format, ['tbr', 'width', 'height']): - a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } diff --git a/yt_dlp/extractor/plvideo.py b/yt_dlp/extractor/plvideo.py deleted file mode 100644 index 27009ca4cd..0000000000 --- a/yt_dlp/extractor/plvideo.py +++ /dev/null @@ -1,130 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - parse_resolution, - url_or_none, -) -from ..utils.traversal import traverse_obj - - -class PlVideoIE(InfoExtractor): - IE_DESC = 'Платформа' - _VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK', - 'md5': 'fe8e18aca892b3b31f3bf492169f8a26', - 'info_dict': { - 'id': 'Y5JzUzkcQTMK', - 'ext': 'mp4', - 'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg', - 'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo', - 'channel': 'RT en Español', - 'channel_id': 'ZH4EKqunVDvo', - 'media_type': 'video', - 'comment_count': int, - 'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'], - 'description': 'md5:a1a395d900d77a86542a91ee0826c115', - 'release_timestamp': 1715096124, - 'channel_is_verified': True, - 'like_count': int, - 'timestamp': 1715095911, - 'duration': 44320, - 'view_count': int, - 'dislike_count': int, - 'upload_date': '20240507', - 'modified_date': '20240701', - 'channel_follower_count': int, - 'modified_timestamp': 1719824073, - }, - }, { - 'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX', - 'md5': '7d8fa2279406c69d2fd2a6fc548a9805', - 'info_dict': { - 'id': 'S3Uo9c-VLwFX', - 'ext': 'mp4', - 'channel': 'Romaatom', - 'tags': 'count:22', - 'dislike_count': int, - 'upload_date': '20241130', - 'description': 'md5:452e6de219bf2f32bb95806c51c3b364', - 'duration': 58433, - 'modified_date': '20241130', - 'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg', - 'media_type': 'shorts', - 'like_count': int, - 'modified_timestamp': 1732961458, - 'channel_is_verified': True, - 'channel_id': 'erJyyTIbmUd1', - 'timestamp': 1732961355, - 'comment_count': int, - 'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе', - 'channel_follower_count': int, - 'view_count': int, - 'release_timestamp': 1732961458, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_data = self._download_json( - f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id) - - is_live = False - formats = [] - subtitles = {} - automatic_captions = {} - for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))): - formats.append({ - 'format_id': quality, - 'ext': 'mp4', - 'protocol': 'm3u8_native', - **traverse_obj(data, { - 'url': 'hls', - 'fps': ('fps', {float_or_none}), - 'aspect_ratio': ('aspectRatio', {float_or_none}), - }), - **parse_resolution(quality), - }) - if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})): - is_live = True - formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True)) - for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))): - if lang.endswith('-auto'): - automatic_captions.setdefault(lang[:-5], []).append({ - 'url': url, - }) - else: - subtitles.setdefault(lang, []).append({ - 'url': url, - }) - - return { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'automatic_captions': automatic_captions, - 'is_live': is_live, - **traverse_obj(video_data, ('item', { - 'id': ('id', {str}), - 'title': ('title', {str}), - 'description': ('description', {str}), - 'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}), - 'duration': ('uploadFile', 'videoDuration', {int_or_none}), - 'channel': ('channel', 'name', {str}), - 'channel_id': ('channel', 'id', {str}), - 'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}), - 'channel_is_verified': ('channel', 'verified', {bool}), - 'tags': ('tags', ..., {str}), - 'timestamp': ('createdAt', {parse_iso8601}), - 'release_timestamp': ('publishedAt', {parse_iso8601}), - 'modified_timestamp': ('updatedAt', {parse_iso8601}), - 'view_count': ('stats', 'viewTotalCount', {int_or_none}), - 'like_count': ('stats', 'likeCount', {int_or_none}), - 'dislike_count': ('stats', 'dislikeCount', {int_or_none}), - 'comment_count': ('stats', 'commentCount', {int_or_none}), - 'media_type': ('type', {str}), - })), - } diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py deleted file mode 100644 index e8a4712051..0000000000 --- a/yt_dlp/extractor/prosiebensat1.py +++ /dev/null @@ -1,496 +0,0 @@ -import hashlib -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - join_nonempty, - merge_dicts, - unified_strdate, -) - - -class ProSiebenSat1BaseIE(InfoExtractor): - _GEO_BYPASS = False - _ACCESS_ID = None - _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' - _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' - - def _extract_video_info(self, url, clip_id): - client_location = url - - video = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos', - clip_id, 'Downloading videos JSON', query={ - 'access_token': self._TOKEN, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - 'ids': clip_id, - })[0] - - if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: - self.report_drm(clip_id) - - formats = [] - if self._ACCESS_ID: - raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID - protocols = self._download_json( - self._V4_BASE_URL + 'protocols', clip_id, - 'Downloading protocols JSON', - headers=self.geo_verification_headers(), query={ - 'access_id': self._ACCESS_ID, - 'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(), - 'video_id': clip_id, - }, fatal=False, expected_status=(403,)) or {} - error = protocols.get('error') or {} - if error.get('title') == 'Geo check failed': - self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) - server_token = protocols.get('server_token') - if server_token: - urls = (self._download_json( - self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ - 'access_id': self._ACCESS_ID, - 'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), - 'protocols': self._SUPPORTED_PROTOCOLS, - 'server_token': server_token, - 'video_id': clip_id, - }, fatal=False) or {}).get('urls') or {} - for protocol, variant in urls.items(): - source_url = variant.get('clear', {}).get('url') - if not source_url: - continue - if protocol == 'dash': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id=protocol, fatal=False)) - elif protocol == 'hls': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id=protocol, fatal=False)) - else: - formats.append({ - 'url': source_url, - 'format_id': protocol, - }) - if not formats: - source_ids = [str(source['id']) for source in video['sources']] - - client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() - - sources = self._download_json( - f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources', - clip_id, 'Downloading sources JSON', query={ - 'access_token': self._TOKEN, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - }) - server_id = sources['server_id'] - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - - for source_id in source_ids: - client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() - urls = self._download_json( - f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources/url', - clip_id, 'Downloading urls JSON', fatal=False, query={ - 'access_token': self._TOKEN, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - 'server_id': server_id, - 'source_ids': source_id, - }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: - continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif mimetype == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id='dash', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '{}/{}'.format(mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': join_nonempty('rtmp', tbr), - }) - else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': join_nonempty('http', tbr), - }) - - return { - 'duration': float_or_none(video.get('duration')), - 'formats': formats, - } - - -class ProSiebenSat1IE(ProSiebenSat1BaseIE): - IE_NAME = 'prosiebensat1' - IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - (?:beta\.)? - (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia - )\.(?:de|at|ch)| - ran\.de|fem\.com|advopedia\.de|galileo\.tv/video - ) - /(?P<id>.+) - ''' - - _TESTS = [ - { - # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 - # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: - # - malformed f4m manifest support - # - proper handling of URLs starting with `https?://` in 2.0 manifests - # - recursive child f4m manifests extraction - 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', - 'info_dict': { - 'id': '2104602', - 'ext': 'mp4', - 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', - 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', - 'upload_date': '20131231', - 'duration': 5845.04, - 'series': 'CIRCUS HALLIGALLI', - 'season_number': 2, - 'episode': 'Episode 18 - Staffel 2', - 'episode_number': 18, - }, - }, - { - 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', - 'info_dict': { - 'id': '2570327', - 'ext': 'mp4', - 'title': 'Lady-Umstyling für Audrina', - 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', - 'upload_date': '20131014', - 'duration': 606.76, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Seems to be broken', - }, - { - 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', - 'info_dict': { - 'id': '2429369', - 'ext': 'mp4', - 'title': 'Countdown für die Autowerkstatt', - 'description': 'md5:809fc051a457b5d8666013bc40698817', - 'upload_date': '20140223', - 'duration': 2595.04, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', - 'info_dict': { - 'id': '2904997', - 'ext': 'mp4', - 'title': 'Sexy laufen in Ugg Boots', - 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', - 'upload_date': '20140122', - 'duration': 245.32, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', - 'info_dict': { - 'id': '2906572', - 'ext': 'mp4', - 'title': 'Im Interview: Kai Wiesinger', - 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', - 'upload_date': '20140203', - 'duration': 522.56, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', - 'info_dict': { - 'id': '2992323', - 'ext': 'mp4', - 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', - 'description': 'md5:2669cde3febe9bce13904f701e774eb6', - 'upload_date': '20141014', - 'duration': 2410.44, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', - 'info_dict': { - 'id': '3004256', - 'ext': 'mp4', - 'title': 'Schalke: Tönnies möchte Raul zurück', - 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', - 'upload_date': '20140226', - 'duration': 228.96, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'This video is unavailable', - }, - { - 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', - 'info_dict': { - 'id': '2572814', - 'ext': 'mp4', - 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', - 'description': 'md5:6ddb02b0781c6adf778afea606652e38', - 'timestamp': 1382041620, - 'upload_date': '20131017', - 'duration': 469.88, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', - 'info_dict': { - 'id': '2156342', - 'ext': 'mp4', - 'title': 'Kurztrips zum Valentinstag', - 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', - 'duration': 307.24, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', - 'info_dict': { - 'id': '439664', - 'title': 'Episode 8 - Ganze Folge - Playlist', - 'description': 'md5:63b8963e71f481782aeea877658dec84', - }, - 'playlist_count': 2, - 'skip': 'This video is unavailable', - }, - { - # title in <h2 class="subtitle"> - 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', - 'info_dict': { - 'id': '4895826', - 'ext': 'mp4', - 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', - 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', - 'upload_date': '20170302', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'geo restricted to Germany', - }, - { - # geo restricted to Germany - 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', - 'only_matching': True, - }, - { - # geo restricted to Germany - 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', - 'only_matching': True, - }, - { - # geo restricted to Germany - 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', - 'only_matching': True, - }, - { - 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', - 'only_matching': True, - }, - { - 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', - 'only_matching': True, - }, - ] - - _TOKEN = 'prosieben' - _SALT = '01!8d8F_)r9]4s[qeuXfP%' - _CLIENT_NAME = 'kolibri-2.0.19-splec4' - - _ACCESS_ID = 'x_prosiebenmaxx-de' - _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' - _IV = 'Aeluchoc6aevechuipiexeeboowedaok' - - _CLIPID_REGEXES = [ - r'"clip_id"\s*:\s+"(\d+)"', - r'clipid: "(\d+)"', - r'clip[iI]d=(\d+)', - r'clip[iI][dD]\s*=\s*["\'](\d+)', - r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", - r'proMamsId"\s*:\s*"(\d+)', - r'proMamsId"\s*:\s*"(\d+)', - ] - _TITLE_REGEXES = [ - r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', - r'<header class="clearfix">\s*<h3>(.+?)</h3>', - r'<!-- start video -->\s*<h1>(.+?)</h1>', - r'<h1 class="att-name">\s*(.+?)</h1>', - r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', - r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', - r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', - r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', - ] - _DESCRIPTION_REGEXES = [ - r'<p itemprop="description">\s*(.+?)</p>', - r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', - r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', - r'<p class="att-description">\s*(.+?)\s*</p>', - r'<p class="video-description" itemprop="description">\s*(.+?)</p>', - r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', - ] - _UPLOAD_DATE_REGEXES = [ - r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', - r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', - r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', - r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', - ] - _PAGE_TYPE_REGEXES = [ - r'<meta name="page_type" content="([^"]+)">', - r"'itemType'\s*:\s*'([^']*)'", - ] - _PLAYLIST_ID_REGEXES = [ - r'content[iI]d=(\d+)', - r"'itemId'\s*:\s*'([^']*)'", - ] - _PLAYLIST_CLIP_REGEXES = [ - r'(?s)data-qvt=.+?<a href="([^"]+)"', - ] - - def _extract_clip(self, url, webpage): - clip_id = self._html_search_regex( - self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex( - self._TITLE_REGEXES, webpage, 'title', - default=None) or self._og_search_title(webpage) - info = self._extract_video_info(url, clip_id) - description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', default=None) - if description is None: - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate( - self._html_search_meta('og:published_time', webpage, - 'upload date', default=None) - or self._html_search_regex(self._UPLOAD_DATE_REGEXES, - webpage, 'upload date', default=None)) - - json_ld = self._search_json_ld(webpage, clip_id, default={}) - - return merge_dicts(info, { - 'id': clip_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - }, json_ld) - - def _extract_playlist(self, url, webpage): - playlist_id = self._html_search_regex( - self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') - playlist = self._parse_json( - self._search_regex( - r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', - webpage, 'playlist'), - playlist_id) - entries = [] - for item in playlist: - clip_id = item.get('id') or item.get('upc') - if not clip_id: - continue - info = self._extract_video_info(url, clip_id) - info.update({ - 'id': clip_id, - 'title': item.get('title') or item.get('teaser', {}).get('headline'), - 'description': item.get('teaser', {}).get('description'), - 'thumbnail': item.get('poster'), - 'duration': float_or_none(item.get('duration')), - 'series': item.get('tvShowTitle'), - 'uploader': item.get('broadcastPublisher'), - }) - entries.append(info) - return self.playlist_result(entries, playlist_id) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - page_type = self._search_regex( - self._PAGE_TYPE_REGEXES, webpage, - 'page type', default='clip').lower() - if page_type == 'clip': - return self._extract_clip(url, webpage) - elif page_type == 'playlist': - return self._extract_playlist(url, webpage) - else: - raise ExtractorError( - f'Unsupported page type {page_type}', expected=True) diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py deleted file mode 100644 index b43f0352b9..0000000000 --- a/yt_dlp/extractor/puls4.py +++ /dev/null @@ -1,50 +0,0 @@ -from .prosiebensat1 import ProSiebenSat1BaseIE -from ..utils import parse_duration, unified_strdate - - -class Puls4IE(ProSiebenSat1BaseIE): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' - _TESTS = [{ - 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', - 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', - 'info_dict': { - 'id': '118118', - 'ext': 'flv', - 'title': 'Tobias Homberger von myclubs im #2min2miotalk', - 'description': 'md5:f9def7c5e8745d6026d8885487d91955', - 'upload_date': '20160830', - 'uploader': 'PULS_4', - }, - }, { - 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', - 'only_matching': True, - }, { - 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', - 'only_matching': True, - }] - _TOKEN = 'puls4' - _SALT = '01!kaNgaiNgah1Ie4AeSha' - _CLIENT_NAME = '' - - def _real_extract(self, url): - path = self._match_id(url) - content_path = self._download_json( - 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] - media = self._download_json( - 'http://www.puls4.com' + content_path, - content_path)['mediaCurrent'] - player_content = media['playerContent'] - info = self._extract_video_info(url, player_content['id']) - info.update({ - 'id': str(media['objectId']), - 'title': player_content['title'], - 'description': media.get('description'), - 'thumbnail': media.get('previewLink'), - 'upload_date': unified_strdate(media.get('date')), - 'duration': parse_duration(player_content.get('duration')), - 'episode': player_content.get('episodePartName'), - 'show': media.get('channel'), - 'season_id': player_content.get('seasonId'), - 'uploader': player_content.get('sourceCompany'), - }) - return info diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py deleted file mode 100644 index 7e4609a62d..0000000000 --- a/yt_dlp/extractor/radiocomercial.py +++ /dev/null @@ -1,154 +0,0 @@ -import itertools - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - extract_attributes, - get_element_by_class, - get_element_html_by_class, - get_element_text_and_html_by_tag, - get_elements_html_by_class, - int_or_none, - join_nonempty, - try_call, - unified_strdate, - update_url, - urljoin, -) -from ..utils.traversal import traverse_obj - - -class RadioComercialIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper', - 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4', - 'info_dict': { - 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas', - 'ext': 'mp3', - 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.', - 'release_date': '20231025', - 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', - 'season': 'Season 6', - 'season_number': 6, - }, - }, { - 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', - 'md5': '47e96c273aef96a8eb160cd6cf46d782', - 'info_dict': { - 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem', - 'ext': 'mp3', - 'title': 'Convença-me num minuto que os lobisomens existem', - 'release_date': '20231026', - 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', - 'season': 'Season 3', - 'season_number': 3, - }, - }, { - 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', - 'md5': '69be64255420fec23b7259955d771e54', - 'info_dict': { - 'id': 'o-desastre-de-aviao', - 'ext': 'mp3', - 'title': 'O desastre de avião', - 'description': 'md5:8a82beeb372641614772baab7246245f', - 'release_date': '20231101', - 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', - 'season': 'Season 2', - 'season_number': 2, - }, - 'params': { - # inconsistant md5 - 'skip_download': True, - }, - }, { - 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro', - 'md5': '91d32d4d4b1407272068b102730fc9fa', - 'info_dict': { - 'id': 't-n-t-29-de-outubro', - 'ext': 'mp3', - 'title': 'T.N.T 29 de outubro', - 'release_date': '20231029', - 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', - 'season': 'Season 2023', - 'season_number': 2023, - }, - }] - - def _real_extract(self, url): - video_id, season = self._match_valid_url(url).group('id', 'season') - webpage = self._download_webpage(url, video_id) - return { - 'id': video_id, - 'title': self._html_extract_title(webpage), - 'description': self._og_search_description(webpage, default=None), - 'release_date': unified_strdate(get_element_by_class( - 'date', get_element_html_by_class('descriptions', webpage) or '')), - 'thumbnail': self._og_search_thumbnail(webpage), - 'season_number': int_or_none(season), - 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'), - } - - -class RadioComercialPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])' - _TESTS = [{ - 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3', - 'info_dict': { - 'id': 'convenca-me-num-minuto_t3', - 'title': 'Convença-me num Minuto - Temporada 3', - }, - 'playlist_mincount': 32, - }, { - 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', - 'info_dict': { - 'id': 'o-homem-que-mordeu-o-cao', - 'title': 'O Homem Que Mordeu o Cão', - }, - 'playlist_mincount': 19, - }, { - 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', - 'info_dict': { - 'id': 'as-minhas-coisas-favoritas', - 'title': 'As Minhas Coisas Favoritas', - }, - 'playlist_mincount': 131, - }, { - 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', - 'info_dict': { - 'id': 'tnt-todos-no-top_t2023', - 'title': 'TNT - Todos No Top - Temporada 2023', - }, - 'playlist_mincount': 39, - }] - - def _entries(self, url, playlist_id): - for page in itertools.count(1): - try: - webpage = self._download_webpage( - f'{url}/{page}', playlist_id, f'Downloading page {page}') - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 404: - break - raise - - episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage) - if not episodes: - break - for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')): - episode_url = urljoin(url, url_path) - if RadioComercialIE.suitable(episode_url): - yield episode_url - - def _real_extract(self, url): - podcast, season = self._match_valid_url(url).group('id', 'season') - playlist_id = join_nonempty(podcast, season, delim='_t') - url = update_url(url, query=None, fragment=None) - webpage = self._download_webpage(url, playlist_id) - - name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) - title = name if name == season else join_nonempty(name, season, delim=' - Temporada ') - - return self.playlist_from_matches( - self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE) diff --git a/yt_dlp/extractor/redge.py b/yt_dlp/extractor/redge.py deleted file mode 100644 index 5ae09a096b..0000000000 --- a/yt_dlp/extractor/redge.py +++ /dev/null @@ -1,134 +0,0 @@ - -from .common import InfoExtractor -from ..networking import HEADRequest -from ..utils import ( - float_or_none, - int_or_none, - join_nonempty, - parse_qs, - update_url_query, -) -from ..utils.traversal import traverse_obj - - -class RedCDNLivxIE(InfoExtractor): - _VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx' - IE_NAME = 'redcdnlivx' - - _TESTS = [{ - 'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000', - 'info_dict': { - 'id': 'ENC02-638272860000-638292544000', - 'ext': 'mp4', - 'title': 'ENC02', - 'duration': 19683.982, - 'live_status': 'was_live', - }, - }, { - 'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000', - 'info_dict': { - 'id': 'ENC18-722333096000-722335562000', - 'ext': 'mp4', - 'title': 'ENC18', - 'duration': 2463.995, - 'live_status': 'was_live', - }, - }, { - 'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000', - 'info_dict': { - 'id': 'triathlon2018-warsaw-550305000000-550327620000', - 'ext': 'mp4', - 'title': 'triathlon2018/warsaw', - 'duration': 22619.98, - 'live_status': 'was_live', - }, - }, { - 'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000', - 'only_matching': True, - }, { - 'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000', - 'only_matching': True, - }] - - ''' - Known methods (first in url path): - - `livedash` - DASH MPD - - `livehls` - HTTP Live Streaming - - `livess` - IIS Smooth Streaming - - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac - - `sc` - shoutcast/icecast (audio streams, like radio) - ''' - - def _real_extract(self, url): - tenant, path = self._match_valid_url(url).group('tenant', 'id') - qs = parse_qs(url) - start_time = traverse_obj(qs, ('startTime', 0, {int_or_none})) - stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none})) - - def livx_mode(mode): - suffix = '' - if mode == 'livess': - suffix = '/manifest' - elif mode == 'livehls': - suffix = '/playlist.m3u8' - file_qs = {} - if start_time: - file_qs['startTime'] = start_time - if stop_time: - file_qs['stopTime'] = stop_time - if mode == 'nvr': - file_qs['nolimit'] = 1 - elif mode != 'sc': - file_qs['indexMode'] = 'true' - return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs) - - # no id or title for a transmission. making ones up. - title = path \ - .replace('/live', '').replace('live/', '') \ - .replace('/channel', '').replace('channel/', '') \ - .strip('/') - video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time) - - formats = [] - # downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata - ism_res = self._download_xml_handle( - livx_mode('livess'), video_id, - note='Downloading ISM manifest', - errnote='Failed to download ISM manifest', - fatal=False) - ism_doc = None - if ism_res is not False: - ism_doc, ism_urlh = ism_res - formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss') - - nvr_urlh = self._request_webpage( - HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False, - expected_status=lambda _: True) - if nvr_urlh and nvr_urlh.status == 200: - formats.append({ - 'url': nvr_urlh.url, - 'ext': 'flv', - 'format_id': 'direct-0', - 'preference': -1, # might be slow - }) - formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False)) - - time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000 - duration = traverse_obj( - ism_doc, ('@Duration', {float_or_none(scale=time_scale)})) or None - - live_status = None - if traverse_obj(ism_doc, '@IsLive') == 'TRUE': - live_status = 'is_live' - elif duration: - live_status = 'was_live' - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'duration': duration, - 'live_status': live_status, - } diff --git a/yt_dlp/extractor/rheinmaintv.py b/yt_dlp/extractor/rheinmaintv.py deleted file mode 100644 index c3b352dede..0000000000 --- a/yt_dlp/extractor/rheinmaintv.py +++ /dev/null @@ -1,94 +0,0 @@ -from .common import InfoExtractor -from ..utils import extract_attributes, merge_dicts, remove_end - - -class RheinMainTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)' - _TESTS = [{ - 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/', - 'info_dict': { - 'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022', - 'ext': 'ismv', # ismv+isma will be merged into mp4 - 'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft', - 'title': 'Auf dem Weg zur Deutschen Meisterschaft', - 'upload_date': '20221108', - 'view_count': int, - 'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft', - 'thumbnail': r're:^https://.+\.jpg', - 'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9', - 'timestamp': 1667933057, - 'duration': 243.0, - }, - 'params': {'skip_download': 'ism'}, - }, { - 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/', - 'info_dict': { - 'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022', - 'ext': 'ismv', - 'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', - 'timestamp': 1668526214, - 'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften', - 'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', - 'view_count': int, - 'thumbnail': r're:^https://.+\.jpg', - 'duration': 345.0, - 'description': 'md5:9370ba29526984006c2cba1372e5c5a0', - 'upload_date': '20221115', - }, - 'params': {'skip_download': 'ism'}, - }, { - 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/', - 'info_dict': { - 'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022', - 'ext': 'ismv', - 'title': 'Casino Mainz bei den Deutschen Meisterschaften', - 'view_count': int, - 'timestamp': 1668527402, - 'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften', - 'upload_date': '20221115', - 'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften', - 'duration': 348.0, - 'thumbnail': r're:^https://.+\.jpg', - 'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa', - }, - 'params': {'skip_download': 'ism'}, - }, { - 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('display_id') - video_id = mobj.group('video_id').replace('/', '-') - webpage = self._download_webpage(url, video_id) - - source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)', - webpage, 'video', group=('source', 'img')) - source = extract_attributes(source) - img = extract_attributes(img) - - raw_json_ld = list(self._yield_json_ld(webpage, video_id)) - json_ld = self._json_ld(raw_json_ld, video_id) - json_ld.pop('url', None) - - ism_manifest_url = ( - source.get('src') - or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject') - ) - formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id) - - return merge_dicts({ - 'id': video_id, - 'display_id': display_id, - 'title': - self._html_search_regex(r'<h1><span class="title">([^<]*)</span>', - webpage, 'headline', default=None) - or img.get('title') or json_ld.get('title') or self._og_search_title(webpage) - or remove_end(self._html_extract_title(webpage), ' -'), - 'alt_title': img.get('alt'), - 'description': json_ld.get('description') or self._og_search_description(webpage), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'), - }, json_ld) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py deleted file mode 100644 index e7b1e22978..0000000000 --- a/yt_dlp/extractor/rmcdecouverte.py +++ /dev/null @@ -1,69 +0,0 @@ -import urllib.parse - -from .brightcove import BrightcoveLegacyIE -from .common import InfoExtractor -from ..utils import smuggle_url - - -class RMCDecouverteIE(InfoExtractor): - _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)' - - _TESTS = [{ - 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/', - 'info_dict': { - 'id': '6250879771001', - 'ext': 'mp4', - 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH', - 'uploader_id': '1969646226001', - 'description': 'md5:aed573ca24abde62a148e0eba909657d', - 'timestamp': 1619622984, - 'upload_date': '20210428', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', - 'info_dict': { - 'id': '5983675500001', - 'ext': 'mp4', - 'title': 'CORVETTE', - 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', - 'uploader_id': '1969646226001', - 'upload_date': '20181226', - 'timestamp': 1545861635, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'only available for a week', - }, { - 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598', - 'only_matching': True, - }, { - # The website accepts any URL as long as it has _\d+ at the end - 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598', - 'only_matching': True, - }, { - # live, geo restricted, bypassable - 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('id') or 'direct' - webpage = self._download_webpage(url, display_id) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - if brightcove_legacy_url: - brightcove_id = urllib.parse.parse_qs(urllib.parse.urlparse( - brightcove_legacy_url).query)['@videoPlayer'][0] - else: - brightcove_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'brightcove id') - return self.url_result( - smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['FR']}), - 'BrightcoveNew', brightcove_id) diff --git a/yt_dlp/extractor/sejmpl.py b/yt_dlp/extractor/sejmpl.py index eb433d2ac3..6f3b79622c 100644 --- a/yt_dlp/extractor/sejmpl.py +++ b/yt_dlp/extractor/sejmpl.py @@ -1,7 +1,6 @@ import datetime as dt from .common import InfoExtractor -from .redge import RedCDNLivxIE from ..utils import ( clean_html, join_nonempty, @@ -27,6 +26,7 @@ def rfc3339_to_atende(date): class SejmIE(InfoExtractor): + _WORKING = False _VALID_URL = ( r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)', r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)', @@ -185,7 +185,7 @@ class SejmIE(InfoExtractor): entries.append({ **common_info, '_type': 'url_transparent', - 'ie_key': RedCDNLivxIE.ie_key(), + 'ie_key': 'redcdnlivx', 'id': stream_id, 'title': join_nonempty(title, stream_id, delim=' - '), }) diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py deleted file mode 100644 index ecf4b27d7a..0000000000 --- a/yt_dlp/extractor/sendtonews.py +++ /dev/null @@ -1,105 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - determine_protocol, - float_or_none, - int_or_none, - parse_iso8601, - unescapeHTML, - update_url_query, -) - - -class SendtoNewsIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' - - _TEST = { - # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ - 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', - 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588', - }, - 'playlist_count': 8, - # test the first video only to prevent lengthy tests - 'playlist': [{ - 'info_dict': { - 'id': '240385', - 'ext': 'mp4', - 'title': 'Indians introduce Encarnacion', - 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', - 'duration': 137.898, - 'thumbnail': r're:https?://.*\.jpg$', - 'upload_date': '20170105', - 'timestamp': 1483649762, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) - (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? - .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* - \1>''', webpage) - if mobj: - sc = mobj.group('SC') - yield cls._URL_TEMPLATE % sc - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - data_url = update_url_query( - url.replace('embedplayer.php', 'data_read.php'), - {'cmd': 'loadInitial'}) - playlist_data = self._download_json(data_url, playlist_id) - - entries = [] - for video in playlist_data['playlistData'][0]: - info_dict = self._parse_jwplayer_data( - video['jwconfiguration'], - require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) - - for f in info_dict['formats']: - if f.get('tbr'): - continue - tbr = int_or_none(self._search_regex( - r'/(\d+)k/', f['url'], 'bitrate', default=None)) - if not tbr: - continue - f.update({ - 'format_id': f'{determine_protocol(f)}-{tbr}', - 'tbr': tbr, - }) - - thumbnails = [] - if video.get('thumbnailUrl'): - thumbnails.append({ - 'id': 'normal', - 'url': video['thumbnailUrl'], - }) - if video.get('smThumbnailUrl'): - thumbnails.append({ - 'id': 'small', - 'url': video['smThumbnailUrl'], - }) - info_dict.update({ - 'title': video['S_headLine'].strip(), - 'description': unescapeHTML(video.get('S_fullStory')), - 'thumbnails': thumbnails, - 'duration': float_or_none(video.get('SM_length')), - 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), - # 'tbr' was explicitly set to be preferred over 'height' originally, - # So this is being kept unless someone can confirm this is unnecessary - '_format_sort_fields': ('tbr', 'res'), - }) - entries.append(info_dict) - - return self.playlist_result(entries, playlist_id) diff --git a/yt_dlp/extractor/sharevideos.py b/yt_dlp/extractor/sharevideos.py deleted file mode 100644 index 3132c7a82b..0000000000 --- a/yt_dlp/extractor/sharevideos.py +++ /dev/null @@ -1,6 +0,0 @@ -from .common import InfoExtractor - - -class ShareVideosEmbedIE(InfoExtractor): - _VALID_URL = False - _EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1'] diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py deleted file mode 100644 index 859e5e8376..0000000000 --- a/yt_dlp/extractor/snotr.py +++ /dev/null @@ -1,68 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - parse_duration, - parse_filesize, - str_to_int, -) - - -class SnotrIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' - _TESTS = [{ - 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', - 'info_dict': { - 'id': '13708', - 'ext': 'mp4', - 'title': 'Drone flying through fireworks!', - 'duration': 248, - 'filesize_approx': 40700000, - 'description': 'A drone flying through Fourth of July Fireworks', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'expected_warnings': ['description'], - }, { - 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', - 'info_dict': { - 'id': '530', - 'ext': 'mp4', - 'title': 'David Letteman - George W. Bush Top 10', - 'duration': 126, - 'filesize_approx': 8500000, - 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - - description = self._og_search_description(webpage) - info_dict = self._parse_html5_media_entries( - url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] - - view_count = str_to_int(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)', - webpage, 'view count', fatal=False)) - - duration = parse_duration(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)', - webpage, 'duration', fatal=False)) - - filesize_approx = parse_filesize(self._html_search_regex( - r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)', - webpage, 'filesize', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'description': description, - 'title': title, - 'view_count': view_count, - 'duration': duration, - 'filesize_approx': filesize_approx, - }) - - return info_dict diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py deleted file mode 100644 index 9a37dd6752..0000000000 --- a/yt_dlp/extractor/springboardplatform.py +++ /dev/null @@ -1,122 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - unescapeHTML, - unified_timestamp, - xpath_attr, - xpath_element, - xpath_text, -) - - -class SpringboardPlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - cms\.springboardplatform\.com/ - (?: - (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)| - xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+) - ) - ''' - _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1'] - _TESTS = [{ - 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1', - 'md5': '5c3cb7b5c55740d482561099e920f192', - 'info_dict': { - 'id': '981017', - 'ext': 'mp4', - 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', - 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', - 'thumbnail': r're:https?://.+\.jpg', - 'timestamp': 1409132328, - 'upload_date': '20140827', - 'duration': 193, - }, - 'skip': 'Invalid URL', - }, { - 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1', - 'only_matching': True, - }, { - 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10', - 'only_matching': True, - }, { - 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/', - 'only_matching': True, - }] - _WEBPAGE_TESTS = [{ - 'url': 'https://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton', - 'info_dict': { - 'id': '1731611', - 'ext': 'mp4', - 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!', - }, - 'skip': 'Invalid URL', - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') or mobj.group('id_2') - index = mobj.group('index') or mobj.group('index_2') - - video = self._download_xml( - f'http://cms.springboardplatform.com/xml_feeds_advanced/index/{index}/rss3/{video_id}', video_id) - - item = xpath_element(video, './/item', 'item', fatal=True) - - content = xpath_element( - item, './{http://search.yahoo.com/mrss/}content', 'content', - fatal=True) - title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True)) - - video_url = content.attrib['url'] - - if 'error_video.mp4' in video_url: - raise ExtractorError( - f'Video {video_id} no longer exists', expected=True) - - duration = int_or_none(content.get('duration')) - tbr = int_or_none(content.get('bitrate')) - filesize = int_or_none(content.get('fileSize')) - width = int_or_none(content.get('width')) - height = int_or_none(content.get('height')) - - description = unescapeHTML(xpath_text( - item, './description', 'description')) - thumbnail = xpath_attr( - item, './{http://search.yahoo.com/mrss/}thumbnail', 'url', - 'thumbnail') - - timestamp = unified_timestamp(xpath_text( - item, './{http://cms.springboardplatform.com/namespaces.html}created', - 'timestamp')) - - formats = [{ - 'url': video_url, - 'format_id': 'http', - 'tbr': tbr, - 'filesize': filesize, - 'width': width, - 'height': height, - }] - - m3u8_format = formats[0].copy() - m3u8_format.update({ - 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8', - 'ext': 'mp4', - 'format_id': 'hls', - 'protocol': 'm3u8_native', - }) - formats.append(m3u8_format) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py deleted file mode 100644 index ab41091cff..0000000000 --- a/yt_dlp/extractor/stanfordoc.py +++ /dev/null @@ -1,89 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - orderedSet, - unescapeHTML, -) - - -class StanfordOpenClassroomIE(InfoExtractor): - IE_NAME = 'stanfordoc' - IE_DESC = 'Stanford Open ClassRoom' - _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' - _TEST = { - 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', - 'md5': '544a9468546059d4e80d76265b0443b8', - 'info_dict': { - 'id': 'PracticalUnix_intro-environment', - 'ext': 'mp4', - 'title': 'Intro Environment', - }, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - - if mobj.group('course') and mobj.group('video'): # A specific video - course = mobj.group('course') - video = mobj.group('video') - info = { - 'id': course + '_' + video, - 'uploader': None, - 'upload_date': None, - } - - base_url = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' - xml_url = base_url + video + '.xml' - mdoc = self._download_xml(xml_url, info['id']) - try: - info['title'] = mdoc.findall('./title')[0].text - info['url'] = base_url + mdoc.findall('./videoFile')[0].text - except IndexError: - raise ExtractorError('Invalid metadata XML file') - return info - elif mobj.group('course'): # A course page - course = mobj.group('course') - info = { - 'id': course, - '_type': 'playlist', - 'uploader': None, - 'upload_date': None, - } - - coursepage = self._download_webpage( - url, info['id'], - note='Downloading course info page', - errnote='Unable to download course info page') - - info['title'] = self._html_search_regex( - r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id']) - - info['description'] = self._html_search_regex( - r'(?s)<description>([^<]+)</description>', - coursepage, 'description', fatal=False) - - links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) - info['entries'] = [self.url_result( - f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}', - ) for l in links] - return info - else: # Root page - info = { - 'id': 'Stanford OpenClassroom', - '_type': 'playlist', - 'uploader': None, - 'upload_date': None, - } - info['title'] = info['id'] - - root_url = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' - rootpage = self._download_webpage(root_url, info['id'], - errnote='Unable to download course info page') - - links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) - info['entries'] = [self.url_result( - f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}', - ) for l in links] - return info diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py deleted file mode 100644 index 09ebabb289..0000000000 --- a/yt_dlp/extractor/stitcher.py +++ /dev/null @@ -1,141 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - clean_podcast_url, - int_or_none, - str_or_none, - try_get, - url_or_none, -) - - -class StitcherBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' - - def _call_api(self, path, video_id, query): - resp = self._download_json( - 'https://api.prod.stitcher.com/' + path, - video_id, query=query) - error_massage = try_get(resp, lambda x: x['errors'][0]['message']) - if error_massage: - raise ExtractorError(error_massage, expected=True) - return resp['data'] - - def _extract_description(self, data): - return clean_html(data.get('html_description') or data.get('description')) - - def _extract_audio_url(self, episode): - return url_or_none(episode.get('audio_url') or episode.get('guid')) - - def _extract_show_info(self, show): - return { - 'thumbnail': show.get('image_base_url'), - 'series': show.get('title'), - } - - def _extract_episode(self, episode, audio_url, show_info): - info = { - 'id': str(episode['id']), - 'display_id': episode.get('slug'), - 'title': episode['title'].strip(), - 'description': self._extract_description(episode), - 'duration': int_or_none(episode.get('duration')), - 'url': clean_podcast_url(audio_url), - 'vcodec': 'none', - 'timestamp': int_or_none(episode.get('date_published')), - 'season_number': int_or_none(episode.get('season')), - 'season_id': str_or_none(episode.get('season_id')), - } - info.update(show_info) - return info - - -class StitcherIE(StitcherBaseIE): - _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', - 'md5': 'e9635098e0da10b21a0e2b85585530f6', - 'info_dict': { - 'id': '40789481', - 'ext': 'mp3', - 'title': 'Machine Learning Mastery and Cancer Clusters', - 'description': 'md5:547adb4081864be114ae3831b4c2b42f', - 'duration': 1604, - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20151008', - 'timestamp': 1444285800, - 'series': 'Talking Machines', - }, - }, { - 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', - 'info_dict': { - 'id': '40846275', - 'display_id': 'the-rare-hourlong-comedy-plus', - 'ext': 'mp3', - 'title': "The CW's 'Crazy Ex-Girlfriend'", - 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', - 'duration': 2235, - 'thumbnail': r're:^https?://.*\.jpg', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Page Not Found', - }, { - # escaped title - 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', - 'only_matching': True, - }, { - 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', - 'only_matching': True, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - data = self._call_api( - 'shows/episodes', audio_id, {'episode_ids': audio_id}) - episode = data['episodes'][0] - audio_url = self._extract_audio_url(episode) - if not audio_url: - self.raise_login_required() - show = try_get(data, lambda x: x['shows'][0], dict) or {} - return self._extract_episode( - episode, audio_url, self._extract_show_info(show)) - - -class StitcherShowIE(StitcherBaseIE): - _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' - _TESTS = [{ - 'url': 'http://www.stitcher.com/podcast/the-talking-machines', - 'info_dict': { - 'id': 'the-talking-machines', - 'title': 'Talking Machines', - 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', - }, - 'playlist_mincount': 106, - }, { - 'url': 'https://www.stitcher.com/show/the-talking-machines', - 'only_matching': True, - }] - - def _real_extract(self, url): - show_slug = self._match_id(url) - data = self._call_api( - f'search/show/{show_slug}/allEpisodes', show_slug, {'count': 10000}) - show = try_get(data, lambda x: x['shows'][0], dict) or {} - show_info = self._extract_show_info(show) - - entries = [] - for episode in (data.get('episodes') or []): - audio_url = self._extract_audio_url(episode) - if not audio_url: - continue - entries.append(self._extract_episode(episode, audio_url, show_info)) - - return self.playlist_result( - entries, show_slug, show.get('title'), - self._extract_description(show)) diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py deleted file mode 100644 index 232837d8d5..0000000000 --- a/yt_dlp/extractor/stretchinternet.py +++ /dev/null @@ -1,35 +0,0 @@ -from .common import InfoExtractor - - -class StretchInternetIE(InfoExtractor): - _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)' - _TEST = { - 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video', - 'info_dict': { - 'id': '573272', - 'ext': 'mp4', - 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA', - # 'timestamp': 1575668361, - # 'upload_date': '20191206', - 'uploader_id': '99997', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - media_url = self._download_json( - 'https://core.stretchlive.com/trinity/event/tcg/' + video_id, - video_id)[0]['media'][0]['url'] - event = self._download_json( - 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', - video_id, query={'eventID': video_id, 'token': 'asdf'})['event'] - - return { - 'id': video_id, - 'title': event['title'], - # TODO: parse US timezone abbreviations - # 'timestamp': event.get('dateTimeString'), - 'url': 'https://' + media_url, - 'uploader_id': event.get('ownerID'), - } diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py deleted file mode 100644 index 2d6fb3eb47..0000000000 --- a/yt_dlp/extractor/swearnet.py +++ /dev/null @@ -1,45 +0,0 @@ -from .vidyard import VidyardBaseIE -from ..utils import ExtractorError, int_or_none, make_archive_id - - -class SwearnetEpisodeIE(VidyardBaseIE): - _VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)' - _TESTS = [{ - 'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1', - 'info_dict': { - 'id': 'wicK2EOzjOdxkUXGDIgcPw', - 'display_id': '232819', - 'ext': 'mp4', - 'episode_number': 1, - 'episode': 'Episode 1', - 'duration': 719, - 'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+', - 'season': 'Season 1', - 'title': 'Episode 1 - Grilled Cheese Sammich', - 'season_number': 1, - 'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg', - 'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'], - '_old_archive_ids': ['swearnetepisode 232819'], - }, - }] - - def _real_extract(self, url): - slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') - webpage = self._download_webpage(url, slug) - - try: - external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') - except ExtractorError: - if 'Upgrade Now' in webpage: - self.raise_login_required() - raise - - info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id) - if info.get('display_id'): - info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])] - - return { - **info, - 'season_number': int_or_none(season_number), - 'episode_number': int_or_none(episode_number), - } diff --git a/yt_dlp/extractor/syvdk.py b/yt_dlp/extractor/syvdk.py deleted file mode 100644 index ec166831cd..0000000000 --- a/yt_dlp/extractor/syvdk.py +++ /dev/null @@ -1,33 +0,0 @@ -from .common import InfoExtractor -from ..utils import traverse_obj - - -class SYVDKIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)' - - _TESTS = [{ - 'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2', - 'md5': '429ce5a423dd4b1e1d0bf3a569558089', - 'info_dict': { - 'id': '12215', - 'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2', - 'ext': 'mp3', - 'title': 'Isabella Arendt stiller op for De Konservative', - 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0] - - return { - 'id': str(info_data['id']), - 'vcodec': 'none', - 'ext': 'mp3', - 'url': info_data['details']['enclosure'], - 'display_id': video_id, - 'title': traverse_obj(info_data, ('title', 'rendered')), - 'description': traverse_obj(info_data, ('details', 'post_title')), - } diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py deleted file mode 100644 index 4cd16f240d..0000000000 --- a/yt_dlp/extractor/tempo.py +++ /dev/null @@ -1,114 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - traverse_obj, - try_call, -) - - -class IVXPlayerIE(InfoExtractor): - _VALID_URL = r'ivxplayer:(?P<video_id>\d+):(?P<player_key>\w+)' - _TESTS = [{ - 'url': 'ivxplayer:2366065:4a89dfe6bc8f002596b1dfbd600730b1', - 'info_dict': { - 'id': '2366065', - 'ext': 'mp4', - 'duration': 112, - 'upload_date': '20221204', - 'title': 'Film Indonesia di Disney Content Showcase Asia Pacific 2022', - 'timestamp': 1670151746, - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300', - }, - }] - _WEBPAGE_TESTS = [{ - 'url': 'https://www.cantika.com/video/31737/film-indonesia-di-disney-content-showcase-asia-pacific-2022', - 'info_dict': { - 'id': '2374200', - 'ext': 'mp4', - 'duration': 110, - 'title': 'Serial Indonesia di Disney Content Showcase Asia Pacific 2022', - 'timestamp': 1670639416, - 'upload_date': '20221210', - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300', - }, - }, { - 'url': 'https://www.gooto.com/video/11437/wuling-suv-ramai-dikunjungi-di-giias-2018', - 'info_dict': { - 'id': '892109', - 'ext': 'mp4', - 'title': 'Wuling SUV Ramai Dikunjungi di GIIAS 2018', - 'upload_date': '20180811', - 'description': 'md5:6d901483d0aacc664aecb4489719aafa', - 'duration': 75, - 'timestamp': 1534011263, - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300', - }, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - # more info at https://player.ivideosmart.com/ivsplayer/v4/dist/js/loader.js - mobj = re.search( - r'<ivs-player\s*[^>]+data-ivs-key\s*=\s*"(?P<player_key>[\w]+)\s*[^>]+\bdata-ivs-vid="(?P<video_id>[\w-]+)', - webpage) - if mobj: - yield f'ivxplayer:{mobj.group("video_id")}:{mobj.group("player_key")}' - raise cls.StopExtraction - - def _real_extract(self, url): - video_id, player_key = self._match_valid_url(url).group('video_id', 'player_key') - json_data = self._download_json( - f'https://ivxplayer.ivideosmart.com/prod/video/{video_id}?key={player_key}', video_id) - - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - json_data['player']['video_url'], video_id) - - return { - 'id': str(json_data['ivx']['id']), - 'title': traverse_obj(json_data, ('ivx', 'name')), - 'description': traverse_obj(json_data, ('ivx', 'description')), - 'duration': int_or_none(traverse_obj(json_data, ('ivx', 'duration'))), - 'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'published_at'))), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url')), - } - - -class TempoIE(InfoExtractor): - _VALID_URL = r'https?://video\.tempo\.co/\w+/\d+/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://video.tempo.co/read/30058/anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki', - 'info_dict': { - 'id': '2144275', - 'display_id': 'anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki', - 'ext': 'mp4', - 'title': 'Anies Baswedan Ajukan Banding Putusan PTUN Batalkan UMP DKI', - 'duration': 85, - 'description': 'md5:a6822b7c4c874fa7e5bd63e96a387b66', - 'thumbnail': 'https://statik.tempo.co/data/2022/07/27/id_1128287/1128287_720.jpg', - 'timestamp': 1658907970, - 'upload_date': '20220727', - 'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'], - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - _, video_id, player_key = next(IVXPlayerIE._extract_embed_urls(url, webpage)).split(':') - - json_ld_data = self._search_json_ld(webpage, display_id) - - return self.url_result( - f'ivxplayer:{video_id}:{player_key}', display_id=display_id, - thumbnail=self._html_search_meta('twitter:image:src', webpage) or self._og_search_thumbnail(webpage), - tags=try_call(lambda: self._html_search_meta('keywords', webpage).split(',')), - description=(json_ld_data.get('description') - or self._html_search_meta(('description', 'twitter:description'), webpage) - or self._og_search_description(webpage)), - url_transparent=True) diff --git a/yt_dlp/extractor/theholetv.py b/yt_dlp/extractor/theholetv.py deleted file mode 100644 index a3a7024288..0000000000 --- a/yt_dlp/extractor/theholetv.py +++ /dev/null @@ -1,35 +0,0 @@ -from .common import InfoExtractor -from ..utils import extract_attributes, remove_end - - -class TheHoleTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov', - 'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9', - 'info_dict': { - 'id': 'gromkii-vopros-sergey-orlov', - 'ext': 'mp4', - 'title': 'Сергей Орлов — Громкий вопрос', - 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts', - 'description': 'md5:45741a9202331f995d9fb76996759379', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - player_attrs = extract_attributes(self._search_regex( - r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player')) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - player_attrs['data-player-source-value'], video_id, 'mp4') - - return { - 'id': video_id, - 'title': remove_end(self._html_extract_title(webpage), ' — The Hole'), - 'description': self._og_search_description(webpage), - 'thumbnail': player_attrs.get('data-player-poster-value'), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py deleted file mode 100644 index 81c9365585..0000000000 --- a/yt_dlp/extractor/traileraddict.py +++ /dev/null @@ -1,61 +0,0 @@ -import re - -from .common import InfoExtractor - - -class TrailerAddictIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' - _TEST = { - 'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer', - 'md5': '41365557f3c8c397d091da510e73ceb4', - 'info_dict': { - 'id': '76184', - 'ext': 'mp4', - 'title': 'Prince Avalanche Trailer', - 'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.', - }, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - name = mobj.group('movie') + '/' + mobj.group('trailer_name') - webpage = self._download_webpage(url, name) - - title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '') - view_count_str = self._search_regex( - r'<span class="views_n">([0-9,.]+)</span>', - webpage, 'view count', fatal=False) - view_count = ( - None if view_count_str is None - else int(view_count_str.replace(',', ''))) - video_id = self._search_regex( - r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>', - webpage, 'video id') - - # Presence of (no)watchplus function indicates HD quality is available - if re.search(r'function (no)?watchplus()', webpage): - fvar = 'fvarhd' - else: - fvar = 'fvar' - - info_url = f'http://www.traileraddict.com/{fvar}.php?tid={video_id!s}' - info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage') - - final_url = self._search_regex(r'&fileurl=(.+)', - info_webpage, 'Download url').replace('%3F', '?') - thumbnail_url = self._search_regex(r'&image=(.+?)&', - info_webpage, 'thumbnail url') - - description = self._html_search_regex( - r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>', - webpage, 'description', fatal=False) - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'thumbnail': thumbnail_url, - 'description': description, - 'view_count': view_count, - } diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py deleted file mode 100644 index 3bdeedd43e..0000000000 --- a/yt_dlp/extractor/triller.py +++ /dev/null @@ -1,329 +0,0 @@ -import itertools -import json -import re - -from .common import InfoExtractor -from ..networking import HEADRequest -from ..utils import ( - ExtractorError, - UnsupportedError, - determine_ext, - int_or_none, - parse_resolution, - str_or_none, - traverse_obj, - unified_timestamp, - url_basename, - url_or_none, - urljoin, -) - - -class TrillerBaseIE(InfoExtractor): - _NETRC_MACHINE = 'triller' - _API_BASE_URL = 'https://social.triller.co/v1.5' - _API_HEADERS = {'Origin': 'https://triller.co'} - - def _perform_login(self, username, password): - if self._API_HEADERS.get('Authorization'): - return - - headers = {**self._API_HEADERS, 'Content-Type': 'application/json'} - user_check = traverse_obj(self._download_json( - f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username', - fatal=False, expected_status=400, headers=headers, - data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status') - - if user_check: # endpoint returns `"status":false` if username exists - raise ExtractorError('Unable to login: Invalid username', expected=True) - - login = self._download_json( - f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False, - expected_status=400, headers=headers, data=json.dumps({ - 'username': username, - 'password': password, - }, separators=(',', ':')).encode()) or {} - - if not login.get('auth_token'): - if login.get('error') == 1008: - raise ExtractorError('Unable to login: Incorrect password', expected=True) - raise ExtractorError('Unable to login') - - self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}' - - def _get_comments(self, video_id, limit=15): - comment_info = self._download_json( - f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2', - video_id, fatal=False, note='Downloading comments API JSON', - headers=self._API_HEADERS, query={'limit': limit}) or {} - if not comment_info.get('comments'): - return - yield from traverse_obj(comment_info, ('comments', ..., { - 'id': ('id', {str_or_none}), - 'text': 'body', - 'author': ('author', 'username'), - 'author_id': ('author', 'user_id'), - 'timestamp': ('timestamp', {unified_timestamp}), - })) - - def _parse_video_info(self, video_info, username, user_id, display_id=None): - video_id = str(video_info['id']) - display_id = display_id or video_info.get('video_uuid') - - if traverse_obj(video_info, ( - None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'), - {lambda x: re.search(r'/copyright/', x)}), get_all=False): - self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True) - - def format_info(url): - return { - 'url': url, - 'ext': determine_ext(url), - 'format_id': url_basename(url).split('.')[0], - } - - formats = [] - - if determine_ext(video_info.get('transcoded_url')) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False)) - - for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))): - formats.append({ - **format_info(video['url']), - **parse_resolution(video.get('resolution')), - 'vcodec': video.get('codec'), - 'vbr': int_or_none(video.get('bitrate'), 1000), - }) - - video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none) - if video_url: - formats.append({ - **format_info(video_url), - 'vcodec': 'h264', - **traverse_obj(video_info, { - 'width': 'width', - 'height': 'height', - 'filesize': 'filesize', - }, expected_type=int_or_none), - }) - - audio_url = url_or_none(video_info.get('audio_url')) - if audio_url: - formats.append(format_info(audio_url)) - - comment_count = traverse_obj(video_info, ('comment_count', {int_or_none})) - - return { - 'id': video_id, - 'display_id': display_id, - 'uploader': username, - 'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})), - 'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id), - 'uploader_url': f'https://triller.co/@{username}', - 'extractor_key': TrillerIE.ie_key(), - 'extractor': TrillerIE.IE_NAME, - 'formats': formats, - 'comment_count': comment_count, - '__post_extractor': self.extract_comments(video_id, comment_count), - **traverse_obj(video_info, { - 'title': ('description', {lambda x: x.replace('\r\n', ' ')}), - 'description': 'description', - 'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'), - 'thumbnail': ('thumbnail_url', {url_or_none}), - 'timestamp': ('timestamp', {unified_timestamp}), - 'duration': ('duration', {int_or_none}), - 'view_count': ('play_count', {int_or_none}), - 'like_count': ('likes_count', {int_or_none}), - 'artist': 'song_artist', - 'track': 'song_title', - }, get_all=False), - } - - -class TrillerIE(TrillerBaseIE): - _VALID_URL = r'''(?x) - https?://(?:www\.)?triller\.co/ - @(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}) - ''' - _TESTS = [{ - 'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf', - 'md5': '228662d783923b60d78395fedddc0a20', - 'info_dict': { - 'id': '71595734', - 'ext': 'mp4', - 'title': 'md5:9a2bf9435c5c4292678996a464669416', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - 'description': 'md5:9a2bf9435c5c4292678996a464669416', - 'uploader': 'theestallion', - 'uploader_id': '18992236', - 'creator': 'Megan Thee Stallion', - 'timestamp': 1660598222, - 'upload_date': '20220815', - 'duration': 47, - 'view_count': int, - 'like_count': int, - 'artist': 'Megan Thee Stallion', - 'track': 'Her', - 'uploader_url': 'https://triller.co/@theestallion', - 'comment_count': int, - }, - 'skip': 'This video has been removed due to licensing restrictions', - }, { - 'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc', - 'md5': '874055f462af5b0699b9dbb527a505a0', - 'info_dict': { - 'id': '71621339', - 'ext': 'mp4', - 'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc', - 'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - 'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc', - 'uploader': 'charlidamelio', - 'uploader_id': '1875551', - 'creator': 'charli damelio', - 'timestamp': 1660773354, - 'upload_date': '20220817', - 'duration': 16, - 'view_count': int, - 'like_count': int, - 'artist': 'Dixie', - 'track': 'Someone to Blame', - 'uploader_url': 'https://triller.co/@charlidamelio', - 'comment_count': int, - }, - }, { - 'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f', - 'md5': 'af7b3553e4b8bfca507636471ee2eb41', - 'info_dict': { - 'id': '71837829', - 'ext': 'mp4', - 'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop', - 'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - 'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop', - 'uploader': 'theestallion', - 'uploader_id': '18992236', - 'creator': 'Megan Thee Stallion', - 'timestamp': 1662486178, - 'upload_date': '20220906', - 'duration': 30, - 'view_count': int, - 'like_count': int, - 'artist': 'Unknown', - 'track': 'Unknown', - 'uploader_url': 'https://triller.co/@theestallion', - 'comment_count': int, - }, - }] - - def _real_extract(self, url): - username, display_id = self._match_valid_url(url).group('username', 'id') - - video_info = self._download_json( - f'{self._API_BASE_URL}/api/videos/{display_id}', display_id, - headers=self._API_HEADERS)['videos'][0] - - return self._parse_video_info(video_info, username, None, display_id) - - -class TrillerUserIE(TrillerBaseIE): - _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])' - _TESTS = [{ - 'url': 'https://triller.co/@theestallion', - 'playlist_mincount': 12, - 'info_dict': { - 'id': '18992236', - 'title': 'theestallion', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - }, - }, { - 'url': 'https://triller.co/@charlidamelio', - 'playlist_mincount': 150, - 'info_dict': { - 'id': '1875551', - 'title': 'charlidamelio', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - }, - }] - - def _real_initialize(self): - if not self._API_HEADERS.get('Authorization'): - guest = self._download_json( - f'{self._API_BASE_URL}/user/create_guest', None, - note='Creating guest session', data=b'', headers=self._API_HEADERS, query={ - 'platform': 'Web', - 'app_version': '', - }) - if not guest.get('auth_token'): - raise ExtractorError('Unable to fetch required auth token for user extraction') - - self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}' - - def _entries(self, username, user_id, limit=6): - query = {'limit': limit} - for page in itertools.count(1): - videos = self._download_json( - f'{self._API_BASE_URL}/api/users/{user_id}/videos', - username, note=f'Downloading user video list page {page}', - headers=self._API_HEADERS, query=query) - - for video in traverse_obj(videos, ('videos', ...)): - yield self._parse_video_info(video, username, user_id) - - query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp')) - if not query['before_time']: - break - - def _real_extract(self, url): - username = self._match_id(url) - - user_info = traverse_obj(self._download_json( - f'{self._API_BASE_URL}/api/users/by_username/{username}', - username, note='Downloading user info', headers=self._API_HEADERS), ('user', {dict})) or {} - - if user_info.get('private') and user_info.get('followed_by_me') not in (True, 'true'): - raise ExtractorError('This user profile is private', expected=True) - elif traverse_obj(user_info, (('blocked_by_user', 'blocking_user'), {bool}), get_all=False): - raise ExtractorError('The author of the video is blocked', expected=True) - - user_id = str_or_none(user_info.get('user_id')) - if not user_id: - raise ExtractorError('Unable to extract user ID') - - return self.playlist_result( - self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url')) - - -class TrillerShortIE(InfoExtractor): - _VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)' - _TESTS = [{ - 'url': 'https://v.triller.co/WWZNWk', - 'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101', - 'info_dict': { - 'id': '66210052', - 'ext': 'mp4', - 'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16', - 'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb', - 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$', - 'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16', - 'uploader': 'statefairent', - 'uploader_id': '487545193', - 'creator': 'Official Summer Fair of LA', - 'timestamp': 1629655457, - 'upload_date': '20210822', - 'duration': 19, - 'view_count': int, - 'like_count': int, - 'artist': 'Unknown', - 'track': 'Unknown', - 'uploader_url': 'https://triller.co/@statefairent', - 'comment_count': int, - }, - }] - - def _real_extract(self, url): - real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).url - if self.suitable(real_url): # Prevent infinite loop in case redirect fails - raise UnsupportedError(real_url) - return self.url_result(real_url) diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py deleted file mode 100644 index 7c47bc78e3..0000000000 --- a/yt_dlp/extractor/tvplayer.py +++ /dev/null @@ -1,79 +0,0 @@ -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - extract_attributes, - try_get, - urlencode_postdata, -) - - -class TVPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)' - _TEST = { - 'url': 'http://tvplayer.com/watch/bbcone', - 'info_dict': { - 'id': '89', - 'ext': 'mp4', - 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - current_channel = extract_attributes(self._search_regex( - r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)', - webpage, 'channel element')) - title = current_channel['data-name'] - - resource_id = current_channel['data-id'] - - token = self._search_regex( - r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage, - 'token', group='token') - - context = self._download_json( - 'https://tvplayer.com/watch/context', display_id, - 'Downloading JSON context', query={ - 'resource': resource_id, - 'gen': token, - }) - - validate = context['validate'] - platform = try_get( - context, lambda x: x['platform']['key'], str) or 'firefox' - - try: - response = self._download_json( - 'http://api.tvplayer.com/api/v2/stream/live', - display_id, 'Downloading JSON stream', headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - }, data=urlencode_postdata({ - 'id': resource_id, - 'service': 1, - 'platform': platform, - 'validate': validate, - }))['tvplayer']['response'] - except ExtractorError as e: - if isinstance(e.cause, HTTPError): - response = self._parse_json( - e.cause.response.read().decode(), resource_id)['tvplayer']['response'] - raise ExtractorError( - '{} said: {}'.format(self.IE_NAME, response['error']), expected=True) - raise - - formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4') - - return { - 'id': resource_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'is_live': True, - } diff --git a/yt_dlp/extractor/ufctv.py b/yt_dlp/extractor/ufctv.py index 2c1c5e0ff1..13b256cd3f 100644 --- a/yt_dlp/extractor/ufctv.py +++ b/yt_dlp/extractor/ufctv.py @@ -5,9 +5,3 @@ class UFCTVIE(ImgGamingBaseIE): _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com' _NETRC_MACHINE = 'ufctv' _REALM = 'ufc' - - -class UFCArabiaIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)' - _NETRC_MACHINE = 'ufcarabia' - _REALM = 'admufc' diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py deleted file mode 100644 index 9abe3436f2..0000000000 --- a/yt_dlp/extractor/uktvplay.py +++ /dev/null @@ -1,36 +0,0 @@ -from .common import InfoExtractor - - -class UKTVPlayIE(InfoExtractor): - _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001', - 'info_dict': { - 'id': '2117008346001', - 'ext': 'mp4', - 'title': 'Pincers', - 'description': 'Pincers', - 'uploader_id': '1242911124001', - 'upload_date': '20130124', - 'timestamp': 1359049267, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'expected_warnings': ['Failed to download MPD manifest'], - }, { - 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001', - 'only_matching': True, - }, { - 'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true', - 'only_matching': True, - }] - # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s' - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % video_id, - 'BrightcoveNew', video_id) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 1e6f8c56ed..5d965ec3a2 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -71,6 +71,8 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'watch\.telusoriginals\.com', r'video\.unext\.jp', r'www\.web\.nhk', + r'fod\.fujitv\.co\.jp', + r'zee5\.com', ) _TESTS = [{ @@ -252,6 +254,14 @@ class KnownDRMIE(UnsupportedInfoExtractor): # https://github.com/yt-dlp/yt-dlp/issues/14620 'url': 'https://www.web.nhk/tv/an/72hours/pl/series-tep-W3W8WRN8M3/ep/QW8ZY6146V', 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/7064 + # https://github.com/yt-dlp/yt-dlp/issues/10264 + 'url': 'https://fod.fujitv.co.jp/title/709f/709f130001/', + 'only_matching': True, + }, { + 'url': 'https://www.zee5.com/', + 'only_matching': True, }] def _real_extract(self, url): @@ -291,6 +301,8 @@ class KnownPiracyIE(UnsupportedInfoExtractor): r'einthusan\.(?:tv|com|ca)', r'yourupload\.com', r'xanimu\.com', + r'musicdex\.org', + r'duboku\.io', ) _TESTS = [{ diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py deleted file mode 100644 index fc44df97b0..0000000000 --- a/yt_dlp/extractor/utreon.py +++ /dev/null @@ -1,98 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - dict_get, - int_or_none, - str_or_none, - try_get, - unified_strdate, - url_or_none, -) - - -class UtreonIE(InfoExtractor): - IE_NAME = 'playeur' - _VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://utreon.com/v/z_I7ikQbuDw', - 'info_dict': { - 'id': 'z_I7ikQbuDw', - 'ext': 'mp4', - 'title': 'Freedom Friday meditation - Rising in the wind', - 'description': 'md5:a9bf15a42434a062fe313b938343ad1b', - 'uploader': 'Heather Dawn Elemental Health', - 'thumbnail': r're:^https?://.+\.jpg', - 'release_date': '20210723', - 'duration': 586, - }, - }, { - 'url': 'https://utreon.com/v/jerJw5EOOVU', - 'info_dict': { - 'id': 'jerJw5EOOVU', - 'ext': 'mp4', - 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]', - 'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6', - 'uploader': 'Frases e Poemas Quotes and Poems', - 'thumbnail': r're:^https?://.+\.jpg', - 'release_date': '20210723', - 'duration': 60, - }, - }, { - 'url': 'https://utreon.com/v/C4ZxXhYBBmE', - 'info_dict': { - 'id': 'C4ZxXhYBBmE', - 'ext': 'mp4', - 'title': 'Biden’s Capital Gains Tax Rate to Test World’s Highest', - 'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e', - 'uploader': 'Nomad Capitalist', - 'thumbnail': r're:^https?://.+\.jpg', - 'release_date': '20210723', - 'duration': 884, - }, - }, { - 'url': 'https://utreon.com/v/Y-stEH-FBm8', - 'info_dict': { - 'id': 'Y-stEH-FBm8', - 'ext': 'mp4', - 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]', - 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f', - 'uploader': 'Merryweather Comics', - 'thumbnail': r're:^https?://.+\.jpg', - 'release_date': '20210718', - 'duration': 151, - }, - }, { - 'url': 'https://playeur.com/v/Wzqp-UrxSeu', - 'info_dict': { - 'id': 'Wzqp-UrxSeu', - 'ext': 'mp4', - 'title': 'Update: Clockwork Basilisk Books on the Way!', - 'description': 'md5:d9756b0b1884c904655b0e170d17cea5', - 'uploader': 'Forgotten Weapons', - 'release_date': '20240208', - 'thumbnail': r're:^https?://.+\.jpg', - 'duration': 262, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json( - 'https://api.playeur.com/v1/videos/' + video_id, - video_id) - videos_json = json_data['videos'] - formats = [{ - 'url': format_url, - 'format_id': format_key.split('_')[1], - 'height': int(format_key.split('_')[1][:-1]), - } for format_key, format_url in videos_json.items() if url_or_none(format_url)] - thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url'))) - return { - 'id': video_id, - 'title': json_data['title'], - 'formats': formats, - 'description': str_or_none(json_data.get('description')), - 'duration': int_or_none(json_data.get('duration')), - 'uploader': str_or_none(try_get(json_data, lambda x: x['channel']['title'])), - 'thumbnail': thumbnail, - 'release_date': unified_strdate(json_data.get('published_datetime')), - } diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 1eb25530f8..dcf48c4ccc 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from .xstream import XstreamIE from ..utils import ( ExtractorError, float_or_none, @@ -9,7 +8,8 @@ from ..utils import ( ) -class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE +class VGTVIE(InfoExtractor): + _WORKING = False IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' _GEO_BYPASS = False diff --git a/yt_dlp/extractor/videodetective.py b/yt_dlp/extractor/videodetective.py deleted file mode 100644 index 7928a41c21..0000000000 --- a/yt_dlp/extractor/videodetective.py +++ /dev/null @@ -1,27 +0,0 @@ -from .common import InfoExtractor -from .internetvideoarchive import InternetVideoArchiveIE - - -class VideoDetectiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', - 'info_dict': { - 'id': '194487', - 'ext': 'mp4', - 'title': 'Kick-Ass 2', - 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - query = 'customerid=69249&publishedid=' + video_id - return self.url_result( - InternetVideoArchiveIE._build_json_url(query), - ie=InternetVideoArchiveIE.ie_key()) diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py deleted file mode 100644 index 6cdda75b2f..0000000000 --- a/yt_dlp/extractor/videofyme.py +++ /dev/null @@ -1,51 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class VideofyMeIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)' - IE_NAME = 'videofy.me' - - _TEST = { - 'url': 'http://www.videofy.me/thisisvideofyme/1100701', - 'md5': 'c77d700bdc16ae2e9f3c26019bd96143', - 'info_dict': { - 'id': '1100701', - 'ext': 'mp4', - 'title': 'This is VideofyMe', - 'description': '', - 'upload_date': '20130326', - 'timestamp': 1364288959, - 'uploader': 'VideofyMe', - 'uploader_id': 'thisisvideofyme', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_json(f'http://vf-player-info-loader.herokuapp.com/{video_id}.json', video_id)['videoinfo'] - - video = config.get('video') - blog = config.get('blog', {}) - - return { - 'id': video_id, - 'title': video['title'], - 'url': video['sources']['source']['url'], - 'thumbnail': video.get('thumb'), - 'description': video.get('description'), - 'timestamp': parse_iso8601(video.get('date')), - 'uploader': blog.get('name'), - 'uploader_id': blog.get('identifier'), - 'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)), - 'like_count': int_or_none(video.get('likes')), - 'comment_count': int_or_none(video.get('nrOfComments')), - } diff --git a/yt_dlp/extractor/videoken.py b/yt_dlp/extractor/videoken.py index dc1dcf12bd..e4d14fe488 100644 --- a/yt_dlp/extractor/videoken.py +++ b/yt_dlp/extractor/videoken.py @@ -1,190 +1,15 @@ -import base64 -import functools -import math -import re -import time import urllib.parse from .common import InfoExtractor from .slideslive import SlidesLiveIE from ..utils import ( - ExtractorError, - InAdvancePagedList, - int_or_none, remove_start, - traverse_obj, update_url_query, url_or_none, ) -class VideoKenBaseIE(InfoExtractor): - _ORGANIZATIONS = { - 'videos.icts.res.in': 'icts', - 'videos.cncf.io': 'cncf', - 'videos.neurips.cc': 'neurips', - } - _BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/' - - _PAGE_SIZE = 12 - - def _get_org_id_and_api_key(self, org, video_id): - details = self._download_json( - f'https://analytics.videoken.com/api/videolake/{org}/details', video_id, - note='Downloading organization ID and API key', headers={ - 'Accept': 'application/json', - }) - return details['id'], details['apikey'] - - def _create_slideslive_url(self, video_url, video_id, referer): - if not video_url and not video_id: - return - elif not video_url or 'embed/sign-in' in video_url: - video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}' - if url_or_none(referer): - return update_url_query(video_url, { - 'embed_parent_url': referer, - 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}', - }) - return video_url - - def _extract_videos(self, videos, url): - for video in traverse_obj(videos, (('videos', 'results'), ...)): - video_id = traverse_obj(video, 'youtube_id', 'videoid') - if not video_id: - continue - ie_key = None - if traverse_obj(video, 'type', 'source') == 'youtube': - video_url = video_id - ie_key = 'Youtube' - else: - video_url = traverse_obj(video, 'embed_url', 'embeddableurl', expected_type=url_or_none) - if not video_url: - continue - elif urllib.parse.urlparse(video_url).hostname == 'slideslive.com': - ie_key = SlidesLiveIE - video_url = self._create_slideslive_url(video_url, video_id, url) - yield self.url_result(video_url, ie_key, video_id) - - -class VideoKenIE(VideoKenBaseIE): - _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)' - _TESTS = [{ - # neurips -> videoken -> slideslive - 'url': 'https://videos.neurips.cc/video/slideslive-38922815', - 'info_dict': { - 'id': '38922815', - 'ext': 'mp4', - 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures', - 'timestamp': 1630939331, - 'upload_date': '20210906', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'thumbnails': 'count:330', - 'chapters': 'count:329', - }, - 'params': { - 'skip_download': 'm3u8', - }, - 'expected_warnings': ['Failed to download VideoKen API JSON'], - }, { - # neurips -> videoken -> slideslive -> youtube - 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348', - 'info_dict': { - 'id': '2Xa_dt78rJE', - 'ext': 'mp4', - 'display_id': '38923348', - 'title': 'Machine Education', - 'description': 'Watch full version of this video at https://slideslive.com/38923348.', - 'channel': 'SlidesLive Videos - G2', - 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w', - 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', - 'uploader': 'SlidesLive Videos - G2', - 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w', - 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', - 'duration': 2504, - 'timestamp': 1618922125, - 'upload_date': '20200131', - 'age_limit': 0, - 'channel_follower_count': int, - 'view_count': int, - 'availability': 'unlisted', - 'live_status': 'not_live', - 'playable_in_embed': True, - 'categories': ['People & Blogs'], - 'tags': [], - 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', - 'thumbnails': 'count:78', - 'chapters': 'count:77', - }, - 'params': { - 'skip_download': 'm3u8', - }, - 'expected_warnings': ['Failed to download VideoKen API JSON'], - }, { - # icts -> videoken -> youtube - 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc', - 'info_dict': { - 'id': 'zysIsojYdvc', - 'ext': 'mp4', - 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad', - 'description': 'md5:87433069d79719eeadc1962cc2ace00b', - 'channel': 'International Centre for Theoretical Sciences', - 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ', - 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ', - 'uploader': 'International Centre for Theoretical Sciences', - 'uploader_id': 'ICTStalks', - 'uploader_url': 'http://www.youtube.com/user/ICTStalks', - 'duration': 3372, - 'upload_date': '20191004', - 'age_limit': 0, - 'live_status': 'not_live', - 'availability': 'public', - 'playable_in_embed': True, - 'channel_follower_count': int, - 'like_count': int, - 'view_count': int, - 'categories': ['Science & Technology'], - 'tags': [], - 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', - 'thumbnails': 'count:42', - 'chapters': 'count:20', - }, - 'params': { - 'skip_download': 'm3u8', - }, - }, { - 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8', - 'only_matching': True, - }, { - 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI', - 'only_matching': True, - }, { - 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU', - 'only_matching': True, - }] - - def _real_extract(self, url): - hostname, video_id = self._match_valid_url(url).group('host', 'id') - org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id) - details = self._download_json( - 'https://analytics.videoken.com/api/videoinfo_private', video_id, query={ - 'videoid': video_id, - 'org_id': org_id, - }, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON', - errnote='Failed to download VideoKen API JSON', fatal=False) - if details: - return next(self._extract_videos({'videos': [details]}, url)) - # fallback for API error 400 response - elif video_id.startswith('slideslive-'): - return self.url_result( - self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) - elif re.match(r'^[\w-]{11}$', video_id): - return self.url_result(video_id, 'Youtube', video_id) - else: - raise ExtractorError('Unable to extract without VideoKen API response') - - -class VideoKenPlayerIE(VideoKenBaseIE): +class VideoKenPlayerIE(InfoExtractor): _VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)' _TESTS = [{ 'url': 'https://player.videoken.com/embed/slideslive-38968434', @@ -203,135 +28,19 @@ class VideoKenPlayerIE(VideoKenBaseIE): }, }] + def _create_slideslive_url(self, video_url, video_id, referer): + if not video_url and not video_id: + return + elif not video_url or 'embed/sign-in' in video_url: + video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}' + if url_or_none(referer): + return update_url_query(video_url, { + 'embed_parent_url': referer, + 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}', + }) + return video_url + def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) - - -class VideoKenPlaylistIE(VideoKenBaseIE): - _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://videos.icts.res.in/category/1822/playlist/381', - 'playlist_mincount': 117, - 'info_dict': { - 'id': '381', - 'title': 'Cosmology - The Next Decade', - }, - }] - - def _real_extract(self, url): - hostname, playlist_id = self._match_valid_url(url).group('host', 'id') - org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id) - videos = self._download_json( - f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/', - playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON') - return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title')) - - -class VideoKenCategoryIE(VideoKenBaseIE): - _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])' - _TESTS = [{ - 'url': 'https://videos.icts.res.in/category/1822/', - 'playlist_mincount': 500, - 'info_dict': { - 'id': '1822', - 'title': 'Programs', - }, - }, { - 'url': 'https://videos.neurips.cc/category/350/', - 'playlist_mincount': 34, - 'info_dict': { - 'id': '350', - 'title': 'NeurIPS 2018', - }, - }, { - 'url': 'https://videos.cncf.io/category/479/', - 'playlist_mincount': 328, - 'info_dict': { - 'id': '479', - 'title': 'KubeCon + CloudNativeCon Europe\'19', - }, - }] - - def _get_category_page(self, category_id, org_id, page=1, note=None): - return self._download_json( - f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id, - fatal=False, note=note if note else f'Downloading category page {page}', - query={ - 'category_id': category_id, - 'page_number': page, - 'length': self._PAGE_SIZE, - }, headers={'Accept': 'application/json'}) or {} - - def _entries(self, category_id, org_id, url, page): - videos = self._get_category_page(category_id, org_id, page + 1) - yield from self._extract_videos(videos, url) - - def _real_extract(self, url): - hostname, category_id = self._match_valid_url(url).group('host', 'id') - org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id) - category_info = self._get_category_page(category_id, org_id, note='Downloading category info') - category = category_info['category_name'] - total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE) - return self.playlist_result(InAdvancePagedList( - functools.partial(self._entries, category_id, org_id, url), - total_pages, self._PAGE_SIZE), category_id, category) - - -class VideoKenTopicIE(VideoKenBaseIE): - _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])' - _TESTS = [{ - 'url': 'https://videos.neurips.cc/topic/machine%20learning/', - 'playlist_mincount': 500, - 'info_dict': { - 'id': 'machine_learning', - 'title': 'machine learning', - }, - }, { - 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/', - 'playlist_mincount': 77, - 'info_dict': { - 'id': 'gravitational_waves', - 'title': 'gravitational waves', - }, - }, { - 'url': 'https://videos.cncf.io/topic/prometheus/', - 'playlist_mincount': 134, - 'info_dict': { - 'id': 'prometheus', - 'title': 'prometheus', - }, - }] - - def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None): - return self._download_json( - 'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={ - 'orgid': org_id, - 'size': self._PAGE_SIZE, - 'query': topic, - 'page': page, - 'sort': 'upload_desc', - 'filter': 'all', - 'token': api_key, - 'is_topic': 'true', - 'category': '', - 'searchid': search_id, - }, headers={'Accept': 'application/json'}, - note=note if note else f'Downloading topic page {page}') or {} - - def _entries(self, topic, org_id, search_id, api_key, url, page): - videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1) - yield from self._extract_videos(videos, url) - - def _real_extract(self, url): - hostname, topic_id = self._match_valid_url(url).group('host', 'id') - topic = urllib.parse.unquote(topic_id) - topic_id = topic.replace(' ', '_') - org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic) - search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode() - total_pages = int_or_none(self._get_topic_page( - topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages']) - return self.playlist_result(InAdvancePagedList( - functools.partial(self._entries, topic, org_id, search_id, api_key, url), - total_pages, self._PAGE_SIZE), topic_id, topic) diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py deleted file mode 100644 index c41d3d9845..0000000000 --- a/yt_dlp/extractor/videomore.py +++ /dev/null @@ -1,304 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_qs, -) - - -class VideomoreBaseIE(InfoExtractor): - _API_BASE_URL = 'https://more.tv/api/v3/web/' - _VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/' - - def _download_page_data(self, display_id): - return self._download_json( - self._API_BASE_URL + 'PageData', display_id, query={ - 'url': '/' + display_id, - })['attributes']['response']['data'] - - def _track_url_result(self, track): - track_vod = track['trackVod'] - video_url = track_vod.get('playerLink') or track_vod['link'] - return self.url_result( - video_url, VideomoreIE.ie_key(), track_vod.get('hubId')) - - -class VideomoreIE(InfoExtractor): - IE_NAME = 'videomore' - _VALID_URL = r'''(?x) - videomore:(?P<sid>\d+)$| - https?:// - (?: - videomore\.ru/ - (?: - embed| - [^/]+/[^/]+ - )/| - (?: - (?:player\.)?videomore\.ru| - siren\.more\.tv/player - )/[^/]*\?.*?\btrack_id=| - odysseus\.more.tv/player/(?P<partner_id>\d+)/ - ) - (?P<id>\d+) - (?:[/?#&]|\.(?:xml|json)|$) - ''' - _EMBED_REGEX = [r'''(?x) - (?: - <iframe[^>]+src=([\'"])| - <object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config= - )(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?) - '''] - _TESTS = [{ - 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', - 'md5': '44455a346edc0d509ac5b5a5b531dc35', - 'info_dict': { - 'id': '367617', - 'ext': 'flv', - 'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук', - 'series': 'Кино в деталях', - 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2910, - 'view_count': int, - 'comment_count': int, - 'age_limit': 16, - }, - 'skip': 'The video is not available for viewing.', - }, { - 'url': 'http://videomore.ru/embed/259974', - 'info_dict': { - 'id': '259974', - 'ext': 'mp4', - 'title': 'Молодежка 2 сезон 40 серия', - 'series': 'Молодежка', - 'season': '2 сезон', - 'episode': '40 серия', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2789, - 'view_count': int, - 'age_limit': 16, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://videomore.ru/molodezhka/sezon_promo/341073', - 'info_dict': { - 'id': '341073', - 'ext': 'flv', - 'title': 'Промо Команда проиграла из-за Бакина?', - 'episode': 'Команда проиграла из-за Бакина?', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 29, - 'age_limit': 16, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'The video is not available for viewing.', - }, { - 'url': 'http://videomore.ru/elki_3?track_id=364623', - 'only_matching': True, - }, { - 'url': 'http://videomore.ru/embed/364623', - 'only_matching': True, - }, { - 'url': 'http://videomore.ru/video/tracks/364623.xml', - 'only_matching': True, - }, { - 'url': 'http://videomore.ru/video/tracks/364623.json', - 'only_matching': True, - }, { - 'url': 'http://videomore.ru/video/tracks/158031/quotes/33248', - 'only_matching': True, - }, { - 'url': 'videomore:367617', - 'only_matching': True, - }, { - 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=', - 'only_matching': True, - }, { - 'url': 'https://odysseus.more.tv/player/1788/352317', - 'only_matching': True, - }, { - 'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=', - 'only_matching': True, - }] - _GEO_BYPASS = False - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('sid') or mobj.group('id') - partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97' - - item = self._download_json( - 'https://siren.more.tv/player/config', video_id, query={ - 'partner_id': partner_id, - 'track_id': video_id, - })['data']['playlist']['items'][0] - - title = item.get('title') - series = item.get('project_name') - season = item.get('season_name') - episode = item.get('episode_name') - if not title: - title = [] - for v in (series, season, episode): - if v: - title.append(v) - title = ' '.join(title) - - streams = item.get('streams') or [] - for protocol in ('DASH', 'HLS'): - stream_url = item.get(protocol.lower() + '_url') - if stream_url: - streams.append({'protocol': protocol, 'url': stream_url}) - - formats = [] - for stream in streams: - stream_url = stream.get('url') - if not stream_url: - continue - protocol = stream.get('protocol') - if protocol == 'DASH': - formats.extend(self._extract_mpd_formats( - stream_url, video_id, mpd_id='dash', fatal=False)) - elif protocol == 'HLS': - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif protocol == 'MSS': - formats.extend(self._extract_ism_formats( - stream_url, video_id, ism_id='mss', fatal=False)) - - if not formats: - error = item.get('error') - if error: - if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'): - self.raise_geo_restricted(countries=['RU'], metadata_available=True) - self.raise_no_formats(error, expected=True) - - return { - 'id': video_id, - 'title': title, - 'series': series, - 'season': season, - 'episode': episode, - 'thumbnail': item.get('thumbnail_url'), - 'duration': int_or_none(item.get('duration')), - 'view_count': int_or_none(item.get('views')), - 'age_limit': int_or_none(item.get('min_age')), - 'formats': formats, - } - - -class VideomoreVideoIE(VideomoreBaseIE): - IE_NAME = 'videomore:video' - _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$' - _TESTS = [{ - # single video with og:video:iframe - 'url': 'http://videomore.ru/elki_3', - 'info_dict': { - 'id': '364623', - 'ext': 'flv', - 'title': 'Ёлки 3', - 'description': '', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 5579, - 'age_limit': 6, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Requires logging in', - }, { - # season single series with og:video:iframe - 'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya', - 'info_dict': { - 'id': '352317', - 'ext': 'mp4', - 'title': 'Последний мент 1 сезон 14 серия', - 'series': 'Последний мент', - 'season': '1 сезон', - 'episode': '14 серия', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2464, - 'age_limit': 16, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk', - 'only_matching': True, - }, { - # single video without og:video:iframe - 'url': 'http://videomore.ru/marin_i_ego_druzya', - 'info_dict': { - 'id': '359073', - 'ext': 'flv', - 'title': '1 серия. Здравствуй, Аквавилль!', - 'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 754, - 'age_limit': 6, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'redirects to https://more.tv/', - }, { - 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', - 'only_matching': True, - }, { - 'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if VideomoreIE.suitable(url) else super().suitable(url) - - def _real_extract(self, url): - display_id = self._match_id(url) - return self._track_url_result(self._download_page_data(display_id)) - - -class VideomoreSeasonIE(VideomoreBaseIE): - IE_NAME = 'videomore:season' - _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$' - _TESTS = [{ - 'url': 'http://videomore.ru/molodezhka/film_o_filme', - 'info_dict': { - 'id': 'molodezhka/film_o_filme', - 'title': 'Фильм о фильме', - }, - 'playlist_mincount': 3, - }, { - 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so', - 'only_matching': True, - }, { - 'url': 'https://more.tv/molodezhka/film_o_filme', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) - else super().suitable(url)) - - def _real_extract(self, url): - display_id = self._match_id(url) - season = self._download_page_data(display_id) - season_id = str(season['id']) - tracks = self._download_json( - self._API_BASE_URL + f'seasons/{season_id}/tracks', - season_id)['data'] - entries = [] - for track in tracks: - entries.append(self._track_url_result(track)) - return self.playlist_result(entries, display_id, season.get('title')) diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py deleted file mode 100644 index 7097149a55..0000000000 --- a/yt_dlp/extractor/vimm.py +++ /dev/null @@ -1,66 +0,0 @@ -from .common import InfoExtractor - - -class VimmIE(InfoExtractor): - IE_NAME = 'Vimm:stream' - _VALID_URL = r'https?://(?:www\.)?vimm\.tv/(?:c/)?(?P<id>[0-9a-z-]+)$' - _TESTS = [{ - 'url': 'https://www.vimm.tv/c/calimeatwagon', - 'info_dict': { - 'id': 'calimeatwagon', - 'ext': 'mp4', - 'title': 're:^calimeatwagon [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'live_status': 'is_live', - }, - 'skip': 'Live', - }, { - 'url': 'https://www.vimm.tv/octaafradio', - 'only_matching': True, - }] - - def _real_extract(self, url): - channel_id = self._match_id(url) - - formats, subs = self._extract_m3u8_formats_and_subtitles( - f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) - - return { - 'id': channel_id, - 'title': channel_id, - 'is_live': True, - 'formats': formats, - 'subtitles': subs, - } - - -class VimmRecordingIE(InfoExtractor): - IE_NAME = 'Vimm:recording' - _VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<channel_id>[0-9a-z-]+)\?v=(?P<video_id>[0-9A-Za-z]+)' - _TESTS = [{ - 'url': 'https://www.vimm.tv/c/kaldewei?v=2JZsrPTFxsSz', - 'md5': '15122ee95baa32a548e4a3e120b598f1', - 'info_dict': { - 'id': '2JZsrPTFxsSz', - 'ext': 'mp4', - 'title': 'VIMM - [DE/GER] Kaldewei Live - In Farbe und Bunt', - 'uploader_id': 'kaldewei', - }, - }] - - def _real_extract(self, url): - channel_id, video_id = self._match_valid_url(url).groups() - - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - - formats, subs = self._extract_m3u8_formats_and_subtitles( - f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False) - - return { - 'id': video_id, - 'title': title, - 'is_live': False, - 'uploader_id': channel_id, - 'formats': formats, - 'subtitles': subs, - } diff --git a/yt_dlp/extractor/vodpl.py b/yt_dlp/extractor/vodpl.py deleted file mode 100644 index 8af1572d07..0000000000 --- a/yt_dlp/extractor/vodpl.py +++ /dev/null @@ -1,29 +0,0 @@ -from .onet import OnetBaseIE - - -class VODPlIE(OnetBaseIE): - _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' - - _TESTS = [{ - 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', - 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', - 'info_dict': { - 'id': '3ep3jns', - 'ext': 'mp4', - 'title': 'Chłopaki nie płaczą', - 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224', - 'timestamp': 1463415154, - 'duration': 5765, - 'upload_date': '20160516', - }, - }, { - 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage) - info_dict['id'] = video_id - return info_dict diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py deleted file mode 100644 index ad7eab351e..0000000000 --- a/yt_dlp/extractor/vuclip.py +++ /dev/null @@ -1,66 +0,0 @@ -import re -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - remove_end, -) - - -class VuClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' - - _TEST = { - 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247§ion=recommend', - 'info_dict': { - 'id': '1129900602', - 'ext': '3gp', - 'title': 'Top 10 TV Convicts', - 'duration': 733, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - ad_m = re.search( - r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage) - if ad_m: - urlr = urllib.parse.urlparse(url) - adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1) - webpage = self._download_webpage( - adfree_url, video_id, note='Download post-ad page') - - error_msg = self._html_search_regex( - r'<p class="message">(.*?)</p>', webpage, 'error message', - default=None) - if error_msg: - raise ExtractorError( - f'{self.IE_NAME} said: {error_msg}', expected=True) - - # These clowns alternate between two page types - video_url = self._search_regex( - r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif', - webpage, 'video URL', default=None) - if video_url: - formats = [{ - 'url': video_url, - }] - else: - formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats'] - - title = remove_end(self._html_search_regex( - r'<title>(.*?)-\s*Vuclip', webpage, 'title').strip(), ' - Video') - - duration = parse_duration(self._html_search_regex( - r'[(>]([0-9]+:[0-9]+)(?:\d+)/[^/]+/(?P\d+)/(?P[0-9]+)' - _TESTS = [{ - # video_type == 'video/vvvvid' - 'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare', - 'info_dict': { - 'id': '505692', - 'ext': 'mp4', - 'title': 'Playstation VR cambierà il nostro modo di giocare', - 'duration': 93, - 'series': 'The Power of Computing', - 'season_id': '518', - 'episode': 'Playstation VR cambierà il nostro modo di giocare', - 'episode_id': '4747', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg', - }, - 'params': { - 'skip_download': True, - }, - }, { - # video_type == 'video/rcs' - 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01', - 'info_dict': { - 'id': '482493', - 'ext': 'mp4', - 'title': 'Episodio 01', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Every video/rcs is not working even in real website', - }, { - # video_type == 'video/youtube' - 'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer', - 'md5': '33e0edfba720ad73a8782157fdebc648', - 'info_dict': { - 'id': 'RzmFKUDOUgw', - 'ext': 'mp4', - 'title': 'Trailer', - 'upload_date': '20150906', - 'description': 'md5:a5e802558d35247fee285875328c0b80', - 'uploader_id': '@EMOTIONLabelChannel', - 'uploader': 'EMOTION Label Channel', - 'episode_id': '3115', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'availability': str, - 'categories': list, - 'age_limit': 0, - 'channel': 'EMOTION Label Channel', - 'channel_follower_count': int, - 'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ', - 'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ', - 'comment_count': int, - 'duration': 133, - 'episode': 'Trailer', - 'heatmap': list, - 'live_status': 'not_live', - 'playable_in_embed': True, - 'season_id': '406', - 'series': 'One-Punch Man', - 'tags': list, - 'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel', - 'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg', - }, - 'params': { - 'skip_download': True, - }, - }, { - # video_type == 'video/dash' - 'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind', - 'info_dict': { - 'id': '527551', - 'ext': 'mp4', - 'title': 'Golden Wind', - 'duration': 1430, - 'series': 'Le bizzarre avventure di Jojo - Vento Aureo', - 'season_id': '938', - 'episode': 'Golden Wind', - 'episode_number': 1, - 'episode_id': '9089', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg', - 'season': 'Season 5', - 'season_number': 5, - }, - 'params': { - 'skip_download': True, - 'format': 'mp4', - }, - }, { - 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048', - 'only_matching': True, - }] - _conn_id = None - - @functools.cached_property - def _headers(self): - return { - **self.geo_verification_headers(), - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37', - } - - def _real_initialize(self): - self._conn_id = self._download_json( - 'https://www.vvvvid.it/user/login', - None, headers=self._headers)['data']['conn_id'] - - def _download_info(self, show_id, path, video_id, fatal=True, query=None): - q = { - 'conn_id': self._conn_id, - } - if query: - q.update(query) - response = self._download_json( - f'https://www.vvvvid.it/vvvvid/ondemand/{show_id}/{path}', - video_id, headers=self._headers, query=q, fatal=fatal) - if not (response or fatal): - return - if response.get('result') == 'error': - raise ExtractorError('{} said: {}'.format( - self.IE_NAME, response['message']), expected=True) - return response['data'] - - def _extract_common_video_info(self, video_data): - return { - 'thumbnail': video_data.get('thumbnail'), - 'episode_id': str_or_none(video_data.get('id')), - } - - def _real_extract(self, url): - show_id, season_id, video_id = self._match_valid_url(url).groups() - - response = self._download_info( - show_id, f'season/{season_id}', - video_id, query={'video_id': video_id}) - - vid = int(video_id) - video_data = next(filter( - lambda episode: episode.get('video_id') == vid, response)) - title = video_data['title'] - formats = [] - - # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js - def ds(h): - g = 'MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij' - - def f(m): - l = [] - o = 0 - b = False - m_len = len(m) - while ((not b) and o < m_len): - n = m[o] << 2 - o += 1 - k = -1 - j = -1 - if o < m_len: - n += m[o] >> 4 - o += 1 - if o < m_len: - k = (m[o - 1] << 4) & 255 - k += m[o] >> 2 - o += 1 - if o < m_len: - j = (m[o - 1] << 6) & 255 - j += m[o] - o += 1 - else: - b = True - else: - b = True - else: - b = True - l.append(n) - if k != -1: - l.append(k) - if j != -1: - l.append(j) - return l - - c = [] - for e in h: - c.append(g.index(e)) - - c_len = len(c) - for e in range(c_len * 2 - 1, -1, -1): - a = c[e % c_len] ^ c[(e + 1) % c_len] - c[e % c_len] = a - - c = f(c) - d = '' - for e in c: - d += chr(e) - - return d - - info = {} - - def metadata_from_url(r_url): - if not info and r_url: - mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url) - if mobj: - info['episode_number'] = int(mobj.group(2)) - season_number = mobj.group(1) - if season_number: - info['season_number'] = int(season_number) - - video_type = video_data.get('video_type') - is_youtube = False - for quality in ('', '_sd'): - embed_code = video_data.get('embed_info' + quality) - if not embed_code: - continue - embed_code = ds(embed_code) - if video_type == 'video/kenc': - embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8') - kenc = self._download_json( - 'https://www.vvvvid.it/kenc', video_id, query={ - 'action': 'kt', - 'conn_id': self._conn_id, - 'url': embed_code, - }, fatal=False) or {} - kenc_message = kenc.get('message') - if kenc_message: - embed_code += '?' + ds(kenc_message) - formats.extend(self._extract_m3u8_formats( - embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False)) - elif video_type == 'video/rcs': - formats.extend(self._extract_akamai_formats(embed_code, video_id)) - elif video_type == 'video/youtube': - info.update({ - '_type': 'url_transparent', - 'ie_key': YoutubeIE.ie_key(), - 'url': embed_code, - }) - is_youtube = True - break - elif video_type == 'video/dash': - formats.extend(self._extract_m3u8_formats( - embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False)) - else: - formats.extend(self._extract_wowza_formats( - f'http://sb.top-ix.org/videomg/_definst_/mp4:{embed_code}/playlist.m3u8', video_id, skip_protocols=['f4m'])) - metadata_from_url(embed_code) - - if not is_youtube: - info['formats'] = formats - - metadata_from_url(video_data.get('thumbnail')) - info.update(self._extract_common_video_info(video_data)) - info.update({ - 'id': video_id, - 'title': title, - 'duration': int_or_none(video_data.get('length')), - 'series': video_data.get('show_title'), - 'season_id': season_id, - 'episode': title, - 'view_count': int_or_none(video_data.get('views')), - 'like_count': int_or_none(video_data.get('video_likes')), - 'repost_count': int_or_none(video_data.get('video_shares')), - }) - return info - - -class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE - _VALID_URL = rf'(?P{VVVVIDIE._VALID_URL_BASE}(?P\d+)(?:/(?P[^/?&#]+))?)/?(?:[?#&]|$)' - _TESTS = [{ - 'url': 'https://www.vvvvid.it/show/156/psyco-pass', - 'info_dict': { - 'id': '156', - 'title': 'Psycho-Pass', - 'description': 'md5:94d572c0bd85894b193b8aebc9a3a806', - }, - 'playlist_count': 46, - }, { - 'url': 'https://www.vvvvid.it/show/156', - 'only_matching': True, - }] - - def _real_extract(self, url): - base_url, show_id, show_title = self._match_valid_url(url).groups() - - seasons = self._download_info( - show_id, 'seasons/', show_title) - - show_info = self._download_info( - show_id, 'info/', show_title, fatal=False) - - if not show_title: - base_url += '/title' - - entries = [] - for season in (seasons or []): - episodes = season.get('episodes') or [] - playlist_title = season.get('name') or show_info.get('title') - for episode in episodes: - if episode.get('playable') is False: - continue - season_id = str_or_none(episode.get('season_id')) - video_id = str_or_none(episode.get('video_id')) - if not (season_id and video_id): - continue - info = self._extract_common_video_info(episode) - info.update({ - '_type': 'url_transparent', - 'ie_key': VVVVIDIE.ie_key(), - 'url': '/'.join([base_url, season_id, video_id]), - 'title': episode.get('title'), - 'description': episode.get('description'), - 'season_id': season_id, - 'playlist_title': playlist_title, - }) - entries.append(info) - - return self.playlist_result( - entries, show_id, show_info.get('title'), show_info.get('description')) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 1c1f0ed052..d906605d19 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -347,35 +347,3 @@ class WDRElefantIE(InfoExtractor): raise ExtractorError( f'{display_id} is not a video', expected=True) return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key()) - - -class WDRMobileIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://mobile-ondemand\.wdr\.de/ - .*?/fsk(?P[0-9]+) - /[0-9]+/[0-9]+/ - (?P[0-9]+)_(?P[0-9]+)''' - IE_NAME = 'wdr:mobile' - _WORKING = False # no such domain - _TEST = { - 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', - 'info_dict': { - 'title': '4283021', - 'id': '421735', - 'ext': 'mp4', - 'age_limit': 0, - }, - 'skip': 'Problems with loading data.', - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - return { - 'id': mobj.group('id'), - 'title': mobj.group('title'), - 'age_limit': int(mobj.group('age_limit')), - 'url': url, - 'http_headers': { - 'User-Agent': 'mobile', - }, - } diff --git a/yt_dlp/extractor/weyyak.py b/yt_dlp/extractor/weyyak.py deleted file mode 100644 index ef12be871f..0000000000 --- a/yt_dlp/extractor/weyyak.py +++ /dev/null @@ -1,86 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - parse_age_limit, - traverse_obj, - unified_timestamp, - url_or_none, -) - - -class WeyyakIE(InfoExtractor): - _VALID_URL = r'https?://weyyak\.com/(?P<lang>\w+)/(?:player/)?(?P<type>episode|movie)/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'https://weyyak.com/en/player/episode/1341952/Ribat-Al-Hob-Episode49', - 'md5': '0caf55c1a615531c8fe60f146ae46849', - 'info_dict': { - 'id': '1341952', - 'ext': 'mp4', - 'title': 'Ribat Al Hob', - 'duration': 2771, - 'alt_title': 'رباط الحب', - 'season': 'Season 1', - 'season_number': 1, - 'episode': 'Episode 49', - 'episode_number': 49, - 'timestamp': 1485907200, - 'upload_date': '20170201', - 'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image', - 'categories': ['Drama', 'Thrillers', 'Romance'], - 'tags': 'count:8', - }, - }, - { - 'url': 'https://weyyak.com/en/movie/233255/8-Seconds', - 'md5': 'fe740ae0f63e4d1c8a7fc147a410c564', - 'info_dict': { - 'id': '233255', - 'ext': 'mp4', - 'title': '8 Seconds', - 'duration': 6490, - 'alt_title': '8 ثواني', - 'description': 'md5:45b83a155c30b49950624c7e99600b9d', - 'age_limit': 15, - 'release_year': 2015, - 'timestamp': 1683106031, - 'upload_date': '20230503', - 'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image', - 'categories': ['Drama', 'Social'], - 'cast': ['Ceylin Adiyaman', 'Esra Inal'], - }, - }, - ] - - def _real_extract(self, url): - video_id, lang, type_ = self._match_valid_url(url).group('id', 'lang', 'type') - - path = 'episode/' if type_ == 'episode' else 'contents/moviedetails?contentkey=' - data = self._download_json( - f'https://msapifo-prod-me.weyyak.z5.com/v1/{lang}/{path}{video_id}', video_id)['data'] - m3u8_url = self._download_json( - f'https://api-weyyak.akamaized.net/get_info/{data["video_id"]}', - video_id, 'Extracting video details')['url_video'] - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id) - - return { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - **traverse_obj(data, { - 'title': ('title', {str}), - 'alt_title': ('translated_title', {str}), - 'description': ('synopsis', {str}), - 'duration': ('length', {float_or_none}), - 'age_limit': ('age_rating', {parse_age_limit}), - 'season_number': ('season_number', {int_or_none}), - 'episode_number': ('episode_number', {int_or_none}), - 'thumbnail': ('imagery', 'thumbnail', {url_or_none}), - 'categories': ('genres', ..., {str}), - 'tags': ('tags', ..., {str}), - 'cast': (('main_actor', 'main_actress'), {str}), - 'timestamp': ('insertedAt', {unified_timestamp}), - 'release_year': ('production_year', {int_or_none}), - }), - } diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py deleted file mode 100644 index f7b48322cd..0000000000 --- a/yt_dlp/extractor/xstream.py +++ /dev/null @@ -1,114 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - find_xpath_attr, - int_or_none, - parse_iso8601, - xpath_text, - xpath_with_ns, -) - - -class XstreamIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - xstream:| - https?://frontend\.xstream\.(?:dk|net)/ - ) - (?P<partner_id>[^/]+) - (?: - :| - /feed/video/\?.*?\bid= - ) - (?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588', - 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', - 'info_dict': { - 'id': '86588', - 'ext': 'mov', - 'title': 'Otto Wollertsen', - 'description': 'Vestlendingen Otto Fredrik Wollertsen', - 'timestamp': 1430473209, - 'upload_date': '20150501', - }, - }, { - 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039', - 'only_matching': True, - }] - - def _extract_video_info(self, partner_id, video_id): - data = self._download_xml( - f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}', - video_id) - - NS_MAP = { - 'atom': 'http://www.w3.org/2005/Atom', - 'xt': 'http://xstream.dk/', - 'media': 'http://search.yahoo.com/mrss/', - } - - entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) - - title = xpath_text( - entry, xpath_with_ns('./atom:title', NS_MAP), 'title') - description = xpath_text( - entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') - timestamp = parse_iso8601(xpath_text( - entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) - - formats = [] - media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) - for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): - media_url = media_content.get('url') - if not media_url: - continue - tbr = int_or_none(media_content.get('bitrate')) - mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) - if mobj: - formats.append({ - 'url': mobj.group('url'), - 'play_path': 'mp4:{}'.format(mobj.group('playpath')), - 'app': mobj.group('app'), - 'ext': 'flv', - 'tbr': tbr, - 'format_id': 'rtmp-%d' % tbr, - }) - else: - formats.append({ - 'url': media_url, - 'tbr': tbr, - }) - - link = find_xpath_attr( - entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') - if link is not None: - formats.append({ - 'url': link.get('href'), - 'format_id': link.get('rel'), - 'quality': 1, - }) - - thumbnails = [{ - 'url': splash.get('url'), - 'width': int_or_none(splash.get('width')), - 'height': int_or_none(splash.get('height')), - } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'formats': formats, - 'thumbnails': thumbnails, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - partner_id = mobj.group('partner_id') - video_id = mobj.group('id') - - return self._extract_video_info(partner_id, video_id) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py deleted file mode 100644 index fb523de03b..0000000000 --- a/yt_dlp/extractor/zee5.py +++ /dev/null @@ -1,269 +0,0 @@ -import json -import time -import uuid - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - str_or_none, - try_call, - try_get, - unified_strdate, - unified_timestamp, - url_or_none, -) - - -class Zee5IE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - zee5:| - https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? - (?: - (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3} - |(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+ - )/(?P<display_id>[^#/?]+)/ - ) - (?P<id>[^#/?]+)/?(?:$|[?#]) - ''' - _TESTS = [{ - 'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669', - 'info_dict': { - 'id': '0-0-movie_1143162669', - 'ext': 'mp4', - 'display_id': 'adavari-matalaku-ardhale-verule', - 'title': 'Adavari Matalaku Ardhale Verule', - 'duration': 9360, - 'description': str, - 'alt_title': 'Adavari Matalaku Ardhale Verule', - 'uploader': 'Zee Entertainment Enterprises Ltd', - 'release_date': '20070427', - 'upload_date': '20070427', - 'timestamp': 1177632000, - 'thumbnail': r're:^https?://.*\.jpg$', - 'episode_number': 0, - 'episode': 'Episode 0', - 'tags': list, - }, - 'params': { - 'format': 'bv', - }, - }, { - 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839', - 'info_dict': { - 'id': '0-1-239839', - 'ext': 'mp4', - 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak', - 'title': 'Yoga Se Hoga-Bandbudh aur Budbak', - 'duration': 659, - 'description': str, - 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak', - 'uploader': 'Zee Entertainment Enterprises Ltd', - 'release_date': '20150101', - 'upload_date': '20150101', - 'timestamp': 1420070400, - 'thumbnail': r're:^https?://.*\.jpg$', - 'series': 'Bandbudh Aur Budbak', - 'season_number': 1, - 'episode_number': 1, - 'episode': 'Episode 1', - 'season': 'Season 1', - 'tags': list, - }, - 'params': { - 'format': 'bv', - }, - }, { - 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973', - 'only_matching': True, - }] - _DEVICE_ID = str(uuid.uuid4()) - _USER_TOKEN = None - _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.' - _NETRC_MACHINE = 'zee5' - _GEO_COUNTRIES = ['IN'] - _USER_COUNTRY = None - - def _perform_login(self, username, password): - if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None: - self.report_login() - otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}', - None, note='Sending OTP') - if otp_request_json['code'] == 0: - self.to_screen(otp_request_json['message']) - else: - raise ExtractorError(otp_request_json['message'], expected=True) - otp_code = self._get_tfa_info('OTP') - otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web', - None, note='Verifying OTP', fatal=False) - if not otp_verify_json: - raise ExtractorError('Unable to verify OTP.', expected=True) - self._USER_TOKEN = otp_verify_json.get('token') - if not self._USER_TOKEN: - raise ExtractorError(otp_request_json['message'], expected=True) - elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)): - self._USER_TOKEN = password - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - token = jwt_decode_hs256(self._USER_TOKEN) - if token.get('exp', 0) <= int(time.time()): - raise ExtractorError('User token has expired', expected=True) - self._USER_COUNTRY = token.get('current_country') - - def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).group('id', 'display_id') - access_token_request = self._download_json( - 'https://launchapi.zee5.com/launch?platform_name=web_app', - video_id, note='Downloading access token')['platform_token'] - data = { - 'x-access-token': access_token_request['token'], - } - if self._USER_TOKEN: - data['Authorization'] = f'bearer {self._USER_TOKEN}' - else: - data['X-Z5-Guest-Token'] = self._DEVICE_ID - - json_data = self._download_json( - 'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={ - 'content_id': video_id, - 'device_id': self._DEVICE_ID, - 'platform_name': 'desktop_web', - 'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN', - 'check_parental_control': False, - }, headers={'content-type': 'application/json'}, data=json.dumps(data).encode()) - asset_data = json_data['assetDetails'] - show_data = json_data.get('showDetails', {}) - if 'premium' in asset_data['business_type']: - raise ExtractorError('Premium content is DRM protected.', expected=True) - if not asset_data.get('hls_url'): - self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None) - formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False) - - subtitles = {} - for sub in asset_data.get('subtitle_url', []): - sub_url = sub.get('url') - if not sub_url: - continue - subtitles.setdefault(sub.get('language', 'en'), []).append({ - 'url': self._proto_relative_url(sub_url), - }) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) - return { - 'id': video_id, - 'display_id': display_id, - 'title': asset_data['title'], - 'formats': formats, - 'subtitles': subtitles, - 'duration': int_or_none(asset_data.get('duration')), - 'description': str_or_none(asset_data.get('description')), - 'alt_title': str_or_none(asset_data.get('original_title')), - 'uploader': str_or_none(asset_data.get('content_owner')), - 'age_limit': parse_age_limit(asset_data.get('age_rating')), - 'release_date': unified_strdate(asset_data.get('release_date')), - 'timestamp': unified_timestamp(asset_data.get('release_date')), - 'thumbnail': url_or_none(asset_data.get('image_url')), - 'series': str_or_none(asset_data.get('tvshow_name')), - 'season': try_get(show_data, lambda x: x['seasons']['title'], str), - 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])), - 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])), - 'tags': try_get(asset_data, lambda x: x['tags'], list), - } - - -class Zee5SeriesIE(InfoExtractor): - IE_NAME = 'zee5:series' - _VALID_URL = r'''(?x) - (?: - zee5:series:| - https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? - (?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2} - ) - (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#]) - ''' - _TESTS = [{ - 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899', - 'playlist_mincount': 156, - 'info_dict': { - 'id': '0-6-1899', - }, - }, { - 'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199', - 'playlist_mincount': 1500, - 'info_dict': { - 'id': '0-6-199', - }, - }, { - 'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965', - 'playlist_mincount': 24, - 'info_dict': { - 'id': '0-6-965', - }, - }, { - 'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201', - 'playlist_mincount': 3, - 'info_dict': { - 'id': '0-6-3201', - }, - }, { - 'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270', - 'playlist_mincount': 150, - 'info_dict': { - 'id': '0-6-270', - }, - }, { - 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes', - 'only_matching': True, - }, { - 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408', - 'only_matching': True, - }] - - def _entries(self, show_id): - access_token_request = self._download_json( - 'https://launchapi.zee5.com/launch?platform_name=web_app', - show_id, note='Downloading access token')['platform_token'] - headers = { - 'X-Access-Token': access_token_request['token'], - 'Referer': 'https://www.zee5.com/', - } - show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN' - - page_num = 0 - show_json = self._download_json(show_url, video_id=show_id, headers=headers) - for season in show_json.get('seasons') or []: - season_id = try_get(season, lambda x: x['id'], str) - next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100' - while next_url: - page_num += 1 - episodes_json = self._download_json( - next_url, video_id=show_id, headers=headers, - note=f'Downloading JSON metadata page {page_num}') - for episode in try_get(episodes_json, lambda x: x['episode'], list) or []: - video_id = episode.get('id') - yield self.url_result( - f'zee5:{video_id}', - ie=Zee5IE.ie_key(), video_id=video_id) - next_url = url_or_none(episodes_json.get('next_episode_api')) - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id)