[cleanup] Remove dead extractors (#16137)

Closes #2623
Closes #2679
Closes #2821
Closes #3416
Closes #4828
Closes #4939
Closes #5421
Closes #7064
Closes #7264
Closes #7654
Closes #8075
Closes #8798
Closes #9313
Closes #9617
Closes #10162
Closes #10252
Closes #10264
Closes #15640

Authored by: doe1080, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
doe1080 2026-06-10 07:35:57 +09:00 committed by GitHub
parent e85da3b985
commit 3ba1534fa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
96 changed files with 39 additions and 10739 deletions

View File

@ -16,7 +16,6 @@ from yt_dlp.extractor import (
CeskaTelevizeIE,
DailymotionIE,
DemocracynowIE,
LyndaIE,
RaiPlayIE,
RTVEALaCartaIE,
TedTalkIE,
@ -250,20 +249,6 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
@is_download_test
@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
@is_download_test
@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles):

View File

@ -54,7 +54,6 @@ from .agora import (
WyborczaPodcastIE,
WyborczaVideoIE,
)
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
from .alibaba import AlibabaIE
from .aliexpress import AliExpressLiveIE
@ -65,10 +64,6 @@ from .allstar import (
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
from .alsace20tv import (
Alsace20TVEmbedIE,
Alsace20TVIE,
)
from .altcensored import (
AltCensoredChannelIE,
AltCensoredIE,
@ -93,7 +88,6 @@ from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .anchorfm import AnchorFMEpisodeIE
from .angel import AngelIE
from .antenna import (
Ant1NewsGrArticleIE,
@ -106,10 +100,6 @@ from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .applepodcasts import ApplePodcastsIE
from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
from .archiveorg import (
ArchiveOrgIE,
YoutubeWebArchiveIE,
@ -140,7 +130,6 @@ from .asobichannel import (
from .asobistage import AsobiStageIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atvat import ATVAtIE
from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
from .audiodraft import (
@ -157,13 +146,6 @@ from .audius import (
AudiusProfileIE,
AudiusTrackIE,
)
from .awaan import (
AWAANIE,
AWAANLiveIE,
AWAANSeasonIE,
AWAANVideoIE,
)
from .axs import AxsIE
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .banbye import (
@ -190,10 +172,6 @@ from .bbc import (
BBCCoUkPlaylistIE,
)
from .beacon import BeaconTvIE
from .beatbump import (
BeatBumpPlaylistIE,
BeatBumpVideoIE,
)
from .beatport import BeatportIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
@ -210,7 +188,6 @@ from .bibeltv import (
BibelTVSeriesIE,
BibelTVVideoIE,
)
from .bigflix import BigflixIE
from .bigo import BigoIE
from .bild import BildIE
from .bilibili import (
@ -255,7 +232,6 @@ from .blerp import BlerpIE
from .blogger import BloggerIE
from .bloomberg import BloombergIE
from .bluesky import BlueskyIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
from .boosty import BoostyIE
from .bostonglobe import BostonGlobeIE
@ -288,14 +264,8 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .caffeinetv import CaffeineTVIE
from .callin import CallinIE
from .caltrans import CaltransIE
from .cam4 import CAM4IE
from .camdemy import (
CamdemyFolderIE,
CamdemyIE,
)
from .camfm import (
CamFMEpisodeIE,
CamFMShowIE,
@ -371,7 +341,6 @@ from .ciscolive import (
from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
from .clipchamp import ClipchampIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .closertotruth import CloserToTruthIE
from .cloudflarestream import CloudflareStreamIE
@ -395,7 +364,6 @@ from .commonprotocols import (
ViewSourceIE,
)
from .condenast import CondeNastIE
from .contv import CONtvIE
from .corus import CorusIE
from .coub import CoubIE
from .cozytv import CozyTVIE
@ -510,7 +478,6 @@ from .dplay import (
)
from .drbonanza import DRBonanzaIE
from .dreisat import DreiSatIE
from .drooble import DroobleIE
from .dropbox import DropboxIE
from .dropout import (
DropoutIE,
@ -525,10 +492,6 @@ from .drtv import (
DRTVSeriesIE,
)
from .dtube import DTubeIE
from .duboku import (
DubokuIE,
DubokuPlaylistIE,
)
from .dumpert import DumpertIE
from .duoplay import DuoplayIE
from .dvtv import DVTVIE
@ -546,8 +509,6 @@ from .eggs import (
EggsArtistIE,
EggsIE,
)
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
from .elpais import ElPaisIE
@ -591,7 +552,6 @@ from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
from .expressen import ExpressenIE
from .eyedotv import EyedoTVIE
from .facebook import (
FacebookAdsIE,
FacebookIE,
@ -655,7 +615,6 @@ from .foxnews import (
from .foxsports import FoxSportsIE
from .fptplay import FptplayIE
from .francaisfacile import FrancaisFacileIE
from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
FranceTVInfoIE,
@ -672,14 +631,10 @@ from .frontendmasters import (
FrontendMastersIE,
FrontendMastersLessonIE,
)
from .fujitv import FujiTVFODPlus7IE
from .funk import FunkIE
from .funker530 import Funker530IE
from .fuyintv import FuyinTVIE
from .gab import (
GabIE,
GabTVIE,
)
from .gab import GabIE
from .gaia import GaiaIE
from .gamedevtv import GameDevTVDashboardIE
from .gamejolt import (
@ -743,16 +698,10 @@ from .googledrive import (
GoogleDriveFolderIE,
GoogleDriveIE,
)
from .googlepodcasts import (
GooglePodcastsFeedIE,
GooglePodcastsIE,
)
from .googlesearch import GoogleSearchIE
from .goplay import GoPlayIE
from .gopro import GoProIE
from .goshgay import GoshgayIE
from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
from .graspop import GraspopIE
from .gronkh import (
GronkhFeedIE,
@ -769,7 +718,6 @@ from .hgtv import HGTVComShowIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitrecord import HitRecordIE
from .hketv import HKETVIE
from .hollywoodreporter import (
HollywoodReporterIE,
HollywoodReporterPlaylistIE,
@ -818,7 +766,6 @@ from .idagio import (
IdagioRecordingIE,
IdagioTrackIE,
)
from .idolplus import IdolPlusIE
from .ign import (
IGNIE,
IGNArticleIE,
@ -851,7 +798,6 @@ from .instagram import (
InstagramUserIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import (
IPrimaCNNIE,
IPrimaIE,
@ -886,7 +832,6 @@ from .iwara import (
IwaraUserIE,
)
from .ixigua import IxiguaIE
from .izlesene import IzleseneIE
from .jamendo import (
JamendoAlbumIE,
JamendoIE,
@ -939,11 +884,9 @@ from .kika import (
KikaIE,
KikaPlaylistIE,
)
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE
from .koo import KooIE
from .krasview import KrasViewIE
from .kth import KTHIE
from .ku6 import Ku6IE
@ -991,7 +934,6 @@ from .lecturio import (
from .leeco import (
LeIE,
LePlaylistIE,
LetvCloudIE,
)
from .lefigaro import (
LeFigaroVideoEmbedIE,
@ -1020,11 +962,6 @@ from .liputan6 import Liputan6IE
from .listennotes import ListenNotesIE
from .litv import LiTVIE
from .livejournal import LiveJournalIE
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
LivestreamShortenerIE,
)
from .livestreamfails import LivestreamfailsIE
from .lnk import LnkIE
from .locipo import (
@ -1048,10 +985,6 @@ from .lsm import (
LSMReplayIE,
)
from .lumni import LumniIE
from .lynda import (
LyndaCourseIE,
LyndaIE,
)
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
from .magentamusik import MagentaMusikIE
@ -1117,13 +1050,11 @@ from .microsoftembed import (
MicrosoftLearnSessionIE,
MicrosoftMediusIE,
)
from .microsoftstream import MicrosoftStreamIE
from .minds import (
MindsChannelIE,
MindsGroupIE,
MindsIE,
)
from .minoto import MinotoIE
from .mir24tv import Mir24TvIE
from .mirrativ import (
MirrativIE,
@ -1157,18 +1088,9 @@ from .mlb import (
from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojevideo import MojevideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .motherless import (
MotherlessGalleryIE,
MotherlessGroupIE,
MotherlessIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE
from .movingimage import MovingImageIE
from .msn import MSNIE
from .mtv import MTVIE
@ -1179,12 +1101,6 @@ from .murrtube import (
)
from .museai import MuseAIIE
from .musescore import MuseScoreIE
from .musicdex import (
MusicdexAlbumIE,
MusicdexArtistIE,
MusicdexPlaylistIE,
MusicdexSongIE,
)
from .mux import MuxIE
from .mx3 import (
Mx3IE,
@ -1212,14 +1128,10 @@ from .nate import (
NateIE,
NateProgramIE,
)
from .nationalgeographic import (
NationalGeographicTVIE,
NationalGeographicVideoIE,
)
from .nationalgeographic import NationalGeographicTVIE
from .naver import (
NaverIE,
NaverLiveIE,
NaverNowIE,
)
from .nba import (
NBAIE,
@ -1257,7 +1169,6 @@ from .nebula import (
NebulaSubscriptionsIE,
)
from .nekohacker import NekoHackerIE
from .nerdcubed import NerdCubedFeedIE
from .nest import (
NestClipIE,
NestIE,
@ -1275,11 +1186,6 @@ from .neteasemusic import (
NetEaseMusicProgramIE,
NetEaseMusicSingerIE,
)
from .netverse import (
NetverseIE,
NetversePlaylistIE,
NetverseSearchIE,
)
from .netzkino import NetzkinoIE
from .newgrounds import (
NewgroundsIE,
@ -1389,11 +1295,6 @@ from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE
from .nuum import (
NuumLiveIE,
NuumMediaIE,
NuumTabIE,
)
from .nuvid import NuvidIE
from .nytimes import (
NYTimesArticleIE,
@ -1426,7 +1327,6 @@ from .onet import (
OnetMVPIE,
OnetPlIE,
)
from .onionstudios import OnionStudiosIE
from .onsen import OnsenIE
from .opencast import (
OpencastIE,
@ -1437,7 +1337,6 @@ from .openrec import (
OpenRecIE,
OpenRecMovieIE,
)
from .ora import OraTVIE
from .orf import (
ORFIPTVIE,
ORFONIE,
@ -1511,26 +1410,18 @@ from .pinterest import (
PinterestCollectionIE,
PinterestIE,
)
from .piramidetv import (
PiramideTVChannelIE,
PiramideTVIE,
)
from .planetmarathi import PlanetMarathiIE
from .platzi import (
PlatziCourseIE,
PlatziIE,
)
from .playerfm import PlayerFmIE
from .playplustv import PlayPlusTVIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playwire import PlaywireIE
from .pluralsight import (
PluralsightCourseIE,
PluralsightIE,
)
from .plutotv import PlutoTVIE
from .plvideo import PlVideoIE
from .plyr import PlyrEmbedIE
from .podbayfm import (
PodbayFMChannelIE,
@ -1574,7 +1465,6 @@ from .prankcast import (
from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE
from .prosiebensat1 import ProSiebenSat1IE
from .prx import (
PRXAccountIE,
PRXSeriesIE,
@ -1586,7 +1476,6 @@ from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE
@ -1610,10 +1499,6 @@ from .radiocanada import (
RadioCanadaAudioVideoIE,
RadioCanadaIE,
)
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE
from .radiofrance import (
FranceCultureIE,
@ -1678,7 +1563,6 @@ from .redbulltv import (
RedBullTVRrnContentIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
from .redgifs import (
RedGifsIE,
RedGifsSearchIE,
@ -1692,13 +1576,11 @@ from .rentv import (
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .ridehome import RideHomeIE
from .rinsefm import (
RinseFMArtistPlaylistIE,
RinseFMIE,
)
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
RokfinChannelIE,
@ -1815,7 +1697,6 @@ from .senategov import (
SenateGovIE,
SenateISVPIE,
)
from .sendtonews import SendtoNewsIE
from .servus import ServusIE
from .sevenplus import SevenPlusIE
from .sexu import SexuIE
@ -1828,7 +1709,6 @@ from .shahid import (
ShahidShowIE,
)
from .sharepoint import SharePointIE
from .sharevideos import ShareVideosEmbedIE
from .shemaroome import ShemarooMeIE
from .shiey import ShieyIE
from .showroomlive import ShowRoomLiveIE
@ -1873,7 +1753,6 @@ from .smotrim import (
SmotrimPlaylistIE,
)
from .snapchat import SnapchatSpotlightIE
from .snotr import SnotrIE
from .softwhiteunderbelly import SoftWhiteUnderbellyIE
from .sohu import (
SohuIE,
@ -1923,7 +1802,6 @@ from .spreaker import (
SpreakerIE,
SpreakerShowIE,
)
from .springboardplatform import SpringboardPlatformIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,
@ -1940,7 +1818,6 @@ from .stacommu import (
TheaterComplexTownVODIE,
)
from .stageplus import StagePlusVODConcertIE
from .stanfordoc import StanfordOpenClassroomIE
from .startrek import StarTrekIE
from .startv import StarTVIE
from .steam import (
@ -1948,10 +1825,6 @@ from .steam import (
SteamCommunityIE,
SteamIE,
)
from .stitcher import (
StitcherIE,
StitcherShowIE,
)
from .storyfire import (
StoryFireIE,
StoryFireSeriesIE,
@ -1961,7 +1834,6 @@ from .streaks import StreaksIE
from .streamable import StreamableIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE
from .stripchat import StripchatIE
from .stv import STVPlayerIE
from .subsplash import (
@ -1979,8 +1851,6 @@ from .svt import (
SVTPlayIE,
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
from .syvdk import SYVDKIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .taptap import (
@ -2039,10 +1909,6 @@ from .telequebec import (
)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
from .tempo import (
IVXPlayerIE,
TempoIE,
)
from .tencent import (
IflixEpisodeIE,
IflixSeriesIE,
@ -2068,7 +1934,6 @@ from .theguardian import (
TheGuardianPodcastPlaylistIE,
)
from .thehighwire import TheHighWireIE
from .theholetv import TheHoleTvIE
from .theintercept import TheInterceptIE
from .theplatform import (
ThePlatformFeedIE,
@ -2120,12 +1985,6 @@ from .toypics import (
ToypicsIE,
ToypicsUserIE,
)
from .traileraddict import TrailerAddictIE
from .triller import (
TrillerIE,
TrillerShortIE,
TrillerUserIE,
)
from .trovo import (
TrovoChannelClipIE,
TrovoChannelVodIE,
@ -2208,7 +2067,6 @@ from .tvplay import (
TVPlayHomeIE,
TVPlayIE,
)
from .tvplayer import TVPlayerIE
from .tvw import (
TvwIE,
TvwNewsIE,
@ -2248,12 +2106,8 @@ from .udemy import (
UdemyIE,
)
from .udn import UDNEmbedIE
from .ufctv import (
UFCTVIE,
UFCArabiaIE,
)
from .ufctv import UFCTVIE
from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
@ -2283,7 +2137,6 @@ from .ustudio import (
UstudioEmbedIE,
UstudioIE,
)
from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veo import VeoIE
@ -2308,20 +2161,7 @@ from .videocampus_sachsen import (
VideocampusSachsenIE,
ViMPPlaylistIE,
)
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videoken import (
VideoKenCategoryIE,
VideoKenIE,
VideoKenPlayerIE,
VideoKenPlaylistIE,
VideoKenTopicIE,
)
from .videomore import (
VideomoreIE,
VideomoreSeasonIE,
VideomoreVideoIE,
)
from .videoken import VideoKenPlayerIE
from .videopress import VideoPressIE
from .vidflex import VidflexIE
from .vidio import (
@ -2351,10 +2191,6 @@ from .vimeo import (
VimeoUserIE,
VimeoWatchLaterIE,
)
from .vimm import (
VimmIE,
VimmRecordingIE,
)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .visir import VisirIE
@ -2372,7 +2208,6 @@ from .vk import (
VKWallPostIE,
)
from .vocaroo import VocarooIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicy import (
VoicyChannelIE,
@ -2404,11 +2239,6 @@ from .vtv import (
VTVIE,
VTVGoIE,
)
from .vuclip import VuClipIE
from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
)
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostArticleIE,
@ -2418,7 +2248,6 @@ from .wat import WatIE
from .wdr import (
WDRIE,
WDRElefantIE,
WDRMobileIE,
WDRPageIE,
)
from .webcamerapl import WebcameraplIE
@ -2445,7 +2274,6 @@ from .weverse import (
WeverseMomentIE,
)
from .wevidi import WeVidiIE
from .weyyak import WeyyakIE
from .whowatch import WhoWatchIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
@ -2494,7 +2322,6 @@ from .ximalaya import (
from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
from .xvideos import (
XVideosIE,
XVideosQuickiesIE,
@ -2618,10 +2445,6 @@ from .zdf import (
ZDFIE,
ZDFChannelIE,
)
from .zee5 import (
Zee5IE,
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE

View File

@ -1,96 +0,0 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj,
)
class AirTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
_TESTS = [{
# without youtube_id
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
'info_dict': {
'id': 'W87jcWleSn2hXZN47zJZsQ',
'ext': 'mp4',
'release_date': '20221003',
'release_timestamp': 1664792603,
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
'upload_date': '20221003',
'duration': 151,
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
},
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
'info_dict': {
'id': '2ZTqmpee-bQ',
'ext': 'mp4',
'comment_count': int,
'tags': 'count:11',
'channel_follower_count': int,
'like_count': int,
'uploader': 'Newsflare',
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
'availability': 'public',
'title': 'Geese Chase Alligator Across Golf Course',
'uploader_id': 'NewsflareBreaking',
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
'age_limit': 0,
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
'view_count': int,
'categories': ['News & Politics'],
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
},
}]
def _get_formats_and_subtitle(self, json_data, video_id):
formats, subtitles = [], {}
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({'url': source.get('src'), 'ext': ext})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
if nextjs_json.get('youtube_id'):
return self.url_result(
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
return {
'id': display_id,
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': nextjs_json.get('description') or None,
'duration': int_or_none(nextjs_json.get('duration')),
'thumbnails': [
{'url': thumbnail}
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
'timestamp': parse_iso8601(nextjs_json.get('created')),
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
'view_count': int_or_none(nextjs_json.get('views')),
}

View File

@ -1,83 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
dict_get,
get_element_by_class,
int_or_none,
unified_strdate,
url_or_none,
)
class Alsace20TVBaseIE(InfoExtractor):
def _extract_video(self, video_id, url=None):
info = self._download_json(
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
video_id) or {}
title = info.get('titre')
formats = []
for res, fmt_url in (info.get('files') or {}).items():
formats.extend(
self._extract_smil_formats(fmt_url, video_id, fatal=False)
if '/smil:_' in fmt_url
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
return {
'id': video_id,
'title': title,
'formats': formats,
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
'view_count': int_or_none(info.get('nb_vues')),
}
class Alsace20TVIE(Alsace20TVBaseIE):
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
_TESTS = [{
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
'info_dict': {
'id': 'lyNHCXpYJh',
'ext': 'mp4',
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
'title': 'Votre JT du jeudi 3 février',
'upload_date': '20220203',
'thumbnail': r're:https?://.+\.jpg',
'duration': 1073,
'view_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_video(video_id, url)
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
_TESTS = [{
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
'info_dict': {
'id': 'lyNHCXpYJh',
'ext': 'mp4',
'title': 'Votre JT du jeudi 3 février',
'upload_date': '20220203',
'thumbnail': r're:https?://.+\.jpg',
'view_count': int,
},
'params': {
'format': 'bestvideo',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_video(video_id)

View File

@ -1,98 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
int_or_none,
str_or_none,
traverse_obj,
unified_timestamp,
)
class AnchorFMEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
'info_dict': {
'id': 'e1tpt3d',
'ext': 'mp3',
'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
'duration': 624.718,
'uploader': 'Lovelyti ',
'uploader_id': '991541',
'channel': 'lovelyti',
'modified_date': '20230121',
'modified_timestamp': 1674285178,
'release_date': '20230121',
'release_timestamp': 1674285179,
'episode_id': 'e1tpt3d',
},
}, {
# embed url
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
'info_dict': {
'id': 'e1shjqd',
'ext': 'mp3',
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
'duration': 1042.008,
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'release_date': '20221221',
'release_timestamp': 1671595916,
'modified_date': '20221221',
'modified_timestamp': 1671590834,
'channel': 'apakatatempo',
'uploader': 'Podcast Tempo',
'uploader_id': '2585461',
'season': 'Season 2',
'season_number': 2,
'episode_id': 'e1shjqd',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
'info_dict': {
'id': 'e1shjqd',
'ext': 'mp3',
'release_date': '20221221',
'duration': 1042.008,
'season': 'Season 2',
'modified_timestamp': 1671590834,
'uploader_id': '2585461',
'modified_date': '20221221',
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
'season_number': 2,
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
'release_timestamp': 1671595916,
'episode_id': 'e1shjqd',
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'uploader': 'Podcast Tempo',
'channel': 'apakatatempo',
},
}]
def _real_extract(self, url):
channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
return {
'id': episode_id,
'title': traverse_obj(api_data, ('episode', 'title')),
'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
'ext': 'mp3',
'vcodec': 'none',
'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
'episode_id': episode_id,
'uploader': traverse_obj(api_data, ('creator', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
}

View File

@ -1,277 +0,0 @@
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
)
class AppleTrailersIE(InfoExtractor):
IE_NAME = 'appletrailers'
_VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
'info_dict': {
'id': '5111',
'title': 'Man of Steel',
},
'playlist': [
{
'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
'info_dict': {
'id': 'manofsteel-trailer4',
'ext': 'mov',
'duration': 111,
'title': 'Trailer 4',
'upload_date': '20130523',
'uploader_id': 'wb',
},
},
{
'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
'info_dict': {
'id': 'manofsteel-trailer3',
'ext': 'mov',
'duration': 182,
'title': 'Trailer 3',
'upload_date': '20130417',
'uploader_id': 'wb',
},
},
{
'md5': 'd0f1e1150989b9924679b441f3404d48',
'info_dict': {
'id': 'manofsteel-trailer',
'ext': 'mov',
'duration': 148,
'title': 'Trailer',
'upload_date': '20121212',
'uploader_id': 'wb',
},
},
{
'md5': '5fe08795b943eb2e757fa95cb6def1cb',
'info_dict': {
'id': 'manofsteel-teaser',
'ext': 'mov',
'duration': 93,
'title': 'Teaser',
'upload_date': '20120721',
'uploader_id': 'wb',
},
},
],
}, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': {
'id': '4489',
'title': 'Blackthorn',
},
'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'],
}, {
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
'info_dict': {
'id': '15881',
'title': 'Kung Fu Panda 3',
},
'playlist_mincount': 4,
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}, {
'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
'only_matching': True,
}]
_JSON_RE = r'iTunes.playURL\((.*?)\);'
def _real_extract(self, url):
mobj = self._match_valid_url(url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
film_id, fatal=False)
if film_data:
entries = []
for clip in film_data.get('clips', []):
clip_title = clip['title']
formats = []
for version, version_data in clip.get('versions', {}).items():
for size, size_data in version_data.get('sizes', {}).items():
src = size_data.get('src')
if not src:
continue
formats.append({
'format_id': f'{version}-{size}',
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'formats': formats,
'title': clip_title,
'thumbnail': clip.get('screen') or clip.get('thumb'),
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
'upload_date': unified_strdate(clip.get('posted')),
'uploader_id': uploader_id,
})
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '&#39;'))
s = re.sub(self._JSON_RE, _clean_json, s)
return f'<html>{s}</html>'
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
playlist = []
for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json)
first_url = trailer_info.get('url')
if not first_url:
continue
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
upload_date = trailer_info['posted'].replace('-', '')
runtime = trailer_info['runtime']
m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
formats = []
for fmt in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
formats.append({
'url': format_url,
'format': fmt['type'],
'width': int_or_none(fmt['width']),
'height': int_or_none(fmt['height']),
})
playlist.append({
'_type': 'video',
'id': video_id,
'formats': formats,
'title': title,
'duration': duration,
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader_id': uploader_id,
'http_headers': {
'User-Agent': 'QuickTime compatible (yt-dlp)',
},
})
return {
'_type': 'playlist',
'id': movie,
'entries': playlist,
}
class AppleTrailersSectionIE(InfoExtractor):
IE_NAME = 'appletrailers:section'
_SECTIONS = {
'justadded': {
'feed_path': 'just_added',
'title': 'Just Added',
},
'exclusive': {
'feed_path': 'exclusive',
'title': 'Exclusive',
},
'justhd': {
'feed_path': 'just_hd',
'title': 'Just HD',
},
'mostpopular': {
'feed_path': 'most_pop',
'title': 'Most Popular',
},
'moviestudios': {
'feed_path': 'studios',
'title': 'Movie Studios',
},
}
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
_TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': {
'title': 'Just Added',
'id': 'justadded',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=exclusive',
'info_dict': {
'title': 'Exclusive',
'id': 'exclusive',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=justhd',
'info_dict': {
'title': 'Just HD',
'id': 'justhd',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=mostpopular',
'info_dict': {
'title': 'Most Popular',
'id': 'mostpopular',
},
'playlist_mincount': 30,
}, {
'url': 'http://trailers.apple.com/#section=moviestudios',
'info_dict': {
'title': 'Movie Studios',
'id': 'moviestudios',
},
'playlist_mincount': 80,
}]
def _real_extract(self, url):
section = self._match_id(url)
section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
section)
entries = [
self.url_result('http://trailers.apple.com' + e['location'])
for e in section_data]
return self.playlist_result(entries, section, self._SECTIONS[section]['title'])

View File

@ -1,107 +0,0 @@
import datetime as dt
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
jwt_encode,
try_get,
)
class ATVAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
_TESTS = [{
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
'info_dict': {
'id': 'v-ce9cgn1e70n5-1',
'ext': 'mp4',
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
},
}, {
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
'only_matching': True,
}]
# extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
_ACCESS_ID = 'x_atv'
_ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
def _extract_video_info(self, url, content, video):
clip_id = content.get('splitId', content['id'])
formats = []
clip_urls = video['urls']
for protocol, variant in clip_urls.items():
source_url = try_get(variant, lambda x: x['clear']['url'])
if not source_url:
continue
if protocol == 'dash':
formats.extend(self._extract_mpd_formats(
source_url, clip_id, mpd_id=protocol, fatal=False))
elif protocol == 'hls':
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id=protocol, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': protocol,
})
return {
'id': clip_id,
'title': content.get('title'),
'duration': float_or_none(content.get('duration')),
'series': content.get('tvShowTitle'),
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_data = self._parse_json(
self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
video_id=video_id)
video_title = json_data['views']['default']['page']['title']
content_resource = json_data['views']['default']['page']['contentResource']
content_id = content_resource[0]['id']
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id_, content in enumerate(content_resource)]
time_of_request = dt.datetime.now()
not_before = time_of_request - dt.timedelta(minutes=5)
expire = time_of_request + dt.timedelta(minutes=5)
payload = {
'content_ids': {
content_id: content_ids,
},
'secure_delivery': True,
'iat': int(time_of_request.timestamp()),
'nbf': int(not_before.timestamp()),
'exp': int(expire.timestamp()),
}
videos = self._download_json(
'https://vas-v4.p7s1video.net/4.0/getsources',
content_id, 'Downloading videos JSON', query={
'token': jwt_encode(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID}),
})
video_id, videos_data = next(iter(videos['data'].items()))
error_msg = try_get(videos_data, lambda x: x['error']['title'])
if error_msg == 'Geo check failed':
self.raise_geo_restricted(error_msg)
elif error_msg:
raise ExtractorError(error_msg)
entries = [
self._extract_video_info(url, content_resource[video['id']], video)
for video in videos_data]
return {
'_type': 'multi_video',
'id': video_id,
'title': video_title,
'entries': entries,
}

View File

@ -1,181 +0,0 @@
import base64
import urllib.parse
from .common import InfoExtractor
from ..utils import (
format_field,
int_or_none,
parse_iso8601,
smuggle_url,
unsmuggle_url,
urlencode_postdata,
)
class AWAANIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0:
return self.url_result(
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
elif season_id and int(season_id) > 0:
return self.url_result(smuggle_url(
f'http://awaan.ae/program/season/{season_id}',
{'show_id': show_id}), 'AWAANSeason')
else:
return self.url_result(
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
class AWAANBaseIE(InfoExtractor):
def _parse_video_data(self, video_data, video_id, is_live):
title = video_data.get('title_en') or video_data['title_ar']
img = video_data.get('img')
return {
'id': video_id,
'title': title,
'description': video_data.get('description_en') or video_data.get('description_ar'),
'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
'uploader_id': video_data.get('user_id'),
}
class AWAANVideoIE(AWAANBaseIE):
IE_NAME = 'awaan:video'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'md5': '5f61c33bfc7794315c671a62d43116aa',
'info_dict':
{
'id': '17375',
'ext': 'mp4',
'title': 'رحلة العمر : الحلقة 1',
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
'duration': 2041,
'timestamp': 1227504126,
'upload_date': '20081124',
'uploader_id': '71',
},
}, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
video_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(video_data, video_id, False)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
})
info.update({
'_type': 'url_transparent',
'url': embed_url,
'ie_key': 'MangomoloVideo',
})
return info
class AWAANLiveIE(AWAANBaseIE):
IE_NAME = 'awaan:live'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
_TEST = {
'url': 'http://awaan.ae/live/6/dubai-tv',
'info_dict': {
'id': '6',
'ext': 'mp4',
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'upload_date': '20150107',
'timestamp': 1420588800,
'uploader_id': '71',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
channel_id = self._match_id(url)
channel_data = self._download_json(
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
channel_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(channel_data, channel_id, True)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
})
info.update({
'_type': 'url_transparent',
'url': embed_url,
'ie_key': 'MangomoloLive',
})
return info
class AWAANSeasonIE(InfoExtractor):
IE_NAME = 'awaan:season'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict':
{
'id': '7910',
'title': 'محاضرات الشيخ الشعراوي',
},
'playlist_mincount': 27,
}
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
show_id, season_id = self._match_valid_url(url).groups()
data = {}
if season_id:
data['season'] = season_id
show_id = smuggled_data.get('show_id')
if show_id is None:
season = self._download_json(
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
season_id, headers={'Origin': 'http://awaan.ae'})
show_id = season['id']
data['show_id'] = show_id
show = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/show',
show_id, data=urlencode_postdata(data), headers={
'Origin': 'http://awaan.ae',
'Content-Type': 'application/x-www-form-urlencoded',
})
if not season_id:
season_id = show['default_season']
for season in show['seasons']:
if season['id'] == season_id:
title = season.get('title_en') or season['title_ar']
entries = []
for video in show['videos']:
video_id = str(video['id'])
entries.append(self.url_result(
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
return self.playlist_result(entries, season_id, title)

View File

@ -1,89 +0,0 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
js_to_json,
parse_iso8601,
traverse_obj,
url_or_none,
)
class AxsIE(InfoExtractor):
IE_NAME = 'axs.tv'
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
'md5': '8d97736ae8e50c64df528e5e676778cf',
'info_dict': {
'id': '5f4dc776b70e4f1c194f22ef',
'title': 'Small Town',
'ext': 'mp4',
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
'upload_date': '20230602',
'timestamp': 1685729564,
'duration': 1284.216,
'series': 'Rock & Roll Road Trip with Sammy Hagar',
'season': 'Season 2',
'season_number': 2,
'episode': '3',
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
},
}, {
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
'md5': '300ae795cd8f9984652c0949734ffbdc',
'info_dict': {
'id': '5f488148b70e4f392572977c',
'display_id': 'daryl-hall',
'title': 'Daryl Hall',
'ext': 'mp4',
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
'upload_date': '20230214',
'timestamp': 1676403615,
'duration': 2570.668,
'series': 'The Big Interview with Dan Rather',
'season': 'Season 3',
'season_number': 3,
'episode': '5',
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage_json_data = self._search_json(
r'mountObj\s*=', webpage, 'video ID data', display_id,
transform_source=js_to_json)
video_id = webpage_json_data['video_id']
company_id = webpage_json_data['company_id']
meta = self._download_json(
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
video_id, query={'device_type': 'desktop_web'})['video']
formats = self._extract_m3u8_formats(
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
subtitles = {}
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
**traverse_obj(meta, {
'title': ('title', {str}),
'description': ('description', {str}),
'series': ('seriestitle', {str}),
'season_number': ('season', {int}),
'episode': ('episode', {str}),
'duration': ('duration', {float_or_none}),
'timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('thumb', {url_or_none}),
}),
'subtitles': subtitles,
}

View File

@ -1,111 +0,0 @@
from .common import InfoExtractor
from .youtube import YoutubeIE, YoutubeTabIE
class BeatBumpVideoIE(InfoExtractor):
_VALID_URL = r'https?://beatbump\.(?:ml|io)/listen\?id=(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
'md5': '5ff3fff41d3935b9810a9731e485fe66',
'info_dict': {
'id': 'MgNrAu2pzNs',
'ext': 'mp4',
'artist': 'Stephen',
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'upload_date': '20190312',
'categories': ['Music'],
'playable_in_embed': True,
'duration': 169,
'like_count': int,
'alt_title': 'Voyeur Girl',
'view_count': int,
'track': 'Voyeur Girl',
'uploader': 'Stephen',
'title': 'Voyeur Girl',
'channel_follower_count': int,
'age_limit': 0,
'availability': 'public',
'live_status': 'not_live',
'album': 'it\'s too much love to know my dear',
'channel': 'Stephen',
'comment_count': int,
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'tags': 'count:11',
'creator': 'Stephen',
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'channel_is_verified': True,
'heatmap': 'count:100',
},
}, {
'url': 'https://beatbump.io/listen?id=LDGZAprNGWo',
'only_matching': True,
}]
def _real_extract(self, url):
id_ = self._match_id(url)
return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
class BeatBumpPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
'playlist_count': 50,
'info_dict': {
'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
'availability': 'unlisted',
'view_count': int,
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
'description': '',
'tags': [],
'modified_date': '20231110',
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
'playlist_mincount': 1,
'params': {'flatplaylist': True},
'info_dict': {
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': '@NoCopyrightSounds',
'channel_follower_count': int,
'title': 'NoCopyrightSounds',
'uploader': 'NoCopyrightSounds',
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
'channel': 'NoCopyrightSounds',
'tags': 'count:65',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'channel_is_verified': True,
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'playlist_mincount': 1,
'params': {'flatplaylist': True},
'info_dict': {
'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': '@NoCopyrightSounds',
'title': 'NCS : All Releases 💿',
'uploader': 'NoCopyrightSounds',
'availability': 'public',
'channel': 'NoCopyrightSounds',
'tags': [],
'modified_date': '20231112',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c',
'only_matching': True,
}]
def _real_extract(self, url):
id_ = self._match_id(url)
return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)

View File

@ -1,71 +0,0 @@
import base64
import re
import urllib.parse
from .common import InfoExtractor
class BigflixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
_TESTS = [{
# 2 formats
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
'info_dict': {
'id': '16070',
'ext': 'mp4',
'title': 'Madarasapatinam',
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
'formats': 'mincount:2',
},
'params': {
'skip_download': True,
},
}, {
# multiple formats
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
webpage, 'title')
def decode_url(quoted_b64_url):
return base64.b64decode(urllib.parse.unquote(
quoted_b64_url)).decode('utf-8')
formats = []
for height, encoded_url in re.findall(
r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
video_url = decode_url(encoded_url)
f = {
'url': video_url,
'format_id': f'{height}p',
'height': int(height),
}
if video_url.startswith('rtmp'):
f['ext'] = 'flv'
formats.append(f)
file_url = self._search_regex(
r'file=([^&]+)', webpage, 'video url', default=None)
if file_url:
video_url = decode_url(file_url)
if all(f['url'] != video_url for f in formats):
formats.append({
'url': decode_url(file_url),
})
description = self._html_search_meta('description', webpage)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
}

View File

@ -1,52 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import ExtractorError
class BokeCCBaseIE(InfoExtractor):
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
player_params_str = self._html_search_regex(
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
webpage, 'player params', group='query')
player_params = urllib.parse.parse_qs(player_params_str)
info_xml = self._download_xml(
'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format(
player_params['siteid'][0], player_params['vid'][0]), video_id)
return [{
'format_id': format_id,
'url': quality.find('./copy').attrib['playurl'],
'quality': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
class BokeCCIE(BokeCCBaseIE):
IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
'info_dict': {
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
'ext': 'flv',
'title': 'BokeCC Video',
},
}]
def _real_extract(self, url):
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query'))
if not qs.get('vid') or not qs.get('uid'):
raise ExtractorError('Invalid URL', expected=True)
video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0])
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': 'BokeCC Video', # no title provided in the webpage
'formats': self._extract_bokecc_formats(webpage, video_id),
}

View File

@ -1,74 +0,0 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
urljoin,
)
class CaffeineTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
_TESTS = [{
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
'info_dict': {
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
'ext': 'mp4',
'title': 'GOOOOD MORNINNNNN #highlights',
'timestamp': 1654702180,
'upload_date': '20220608',
'uploader': 'RahJON Wicc',
'uploader_id': 'TsuSurf',
'duration': 3145,
'age_limit': 17,
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': ['highlights', 'battlerap'],
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
video_url = broadcast_info['video_url']
ext = determine_ext(video_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
else:
formats = [{'url': video_url}]
return {
'id': video_id,
'formats': formats,
**traverse_obj(json_data, {
'like_count': ('like_count', {int_or_none}),
'view_count': ('view_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'tags': ('tags', ..., {str}, filter),
'uploader': ('user', 'name', {str}),
'uploader_id': (((None, 'user'), 'username'), {str}, any),
'is_live': ('is_live', {bool}),
}),
**traverse_obj(broadcast_info, {
'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {urljoin(url)}),
}),
'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
'FOUR_PLUS': 0,
'NINE_PLUS': 9,
'TWELVE_PLUS': 12,
'SEVENTEEN_PLUS': 17,
}.get(broadcast_info.get('content_rating'), 17),
}

View File

@ -1,155 +0,0 @@
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'info_dict': {
'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
'ext': 'ts',
'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'thumbnail': 're:https://.+\\.png',
'description': 'First episode',
'uploader': 'Wesley Yang',
'timestamp': 1639404128.65,
'upload_date': '20211213',
'uploader_id': 'wesyang',
'uploader_url': 'http://wesleyyang.substack.com',
'channel': 'Conversations in Year Zero',
'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
'duration': 9951.936,
'view_count': int,
'categories': ['News & Politics', 'History', 'Technology'],
'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
'series': 'Conversations in Year Zero',
'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
'episode_number': 1,
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
},
}, {
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'md5': '14ede27ee2c957b7e4db93140fc0745c',
'info_dict': {
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
'ext': 'ts',
'title': 'FCC Commissioner Brendan Carr on Elons Starlink',
'description': 'Or, why the government doesnt like SpaceX',
'channel': 'The Pull Request',
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
'duration': 3182.472,
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
'uploader_url': 'http://thepullrequest.com',
'upload_date': '20220902',
'episode': 'FCC Commissioner Brendan Carr on Elons Starlink',
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'series': 'The Pull Request',
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
'view_count': int,
'uploader': 'Antonio García Martínez',
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
'timestamp': 1662100688.005,
},
}, {
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
'md5': '16f704ddbf82a27e3930533b12062f07',
'info_dict': {
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
'ext': 'ts',
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'description': 'Lets talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
'channel': 'The DEBRIEF With Briahna Joy Gray',
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
'duration': 10043.16,
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
'uploader_url': 'http://patreon.com/badfaithpodcast',
'upload_date': '20220826',
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'display_id': 'episode-',
'series': 'The DEBRIEF With Briahna Joy Gray',
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
'view_count': int,
'uploader': 'Briahna Gray',
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
'timestamp': 1661476708.282,
},
}]
def try_get_user_name(self, d):
names = [d.get(n) for n in ('first', 'last')]
if None in names:
return next((n for n in names if n), default=None)
return ' '.join(names)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
next_data = self._search_nextjs_data(webpage, display_id)
episode = next_data['props']['pageProps']['episode']
video_id = episode['id']
title = episode.get('title') or self._generic_title('', webpage)
url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
show = traverse_obj(episode, ('show', 'title'))
show_id = traverse_obj(episode, ('show', 'id'))
show_json = None
app_slug = (self._html_search_regex(
'<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
webpage, 'app slug', fatal=False) or next_data.get('buildId'))
show_slug = traverse_obj(episode, ('show', 'linkObj', 'resourceUrl'))
if app_slug and show_slug and '/' in show_slug:
show_slug = show_slug.rsplit('/', 1)[1]
show_json_url = f'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
show_json = self._download_json(show_json_url, display_id, fatal=False)
host = (traverse_obj(show_json, ('pageProps', 'show', 'hosts', 0))
or traverse_obj(episode, ('speakers', 0)))
host_nick = traverse_obj(host, ('linkObj', 'resourceUrl'))
host_nick = host_nick.rsplit('/', 1)[1] if (host_nick and '/' in host_nick) else None
cast = list(filter(None, [
self.try_get_user_name(u) for u in
traverse_obj(episode, (('speakers', 'callerTags'), ...)) or []
]))
episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
episode_number = next(
(len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id),
None)
return {
'id': video_id,
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': traverse_obj(episode, ('show', 'photo')),
'description': episode.get('description'),
'uploader': self.try_get_user_name(host) if host else None,
'timestamp': episode.get('publishedAt'),
'uploader_id': host_nick,
'uploader_url': traverse_obj(show_json, ('pageProps', 'show', 'url')),
'channel': show,
'channel_id': show_id,
'channel_url': traverse_obj(episode, ('show', 'linkObj', 'resourceUrl')),
'duration': float_or_none(episode.get('runtime')),
'view_count': int_or_none(episode.get('plays')),
'categories': traverse_obj(episode, ('show', 'categorizations', ..., 'name')),
'cast': cast if cast else None,
'series': show,
'series_id': show_id,
'episode': title,
'episode_number': episode_number,
'episode_id': video_id,
}

View File

@ -1,155 +0,0 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
parse_duration,
str_to_int,
unified_strdate,
)
class CamdemyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{
# single file
'url': 'http://www.camdemy.com/media/5181/',
'md5': '5a5562b6a98b37873119102e052e311b',
'info_dict': {
'id': '5181',
'ext': 'mp4',
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
'thumbnail': r're:^https?://.*\.jpg$',
'creator': 'ss11spring',
'duration': 1591,
'upload_date': '20130114',
'view_count': int,
},
}, {
# With non-empty description
# webpage returns "No permission or not login"
'url': 'http://www.camdemy.com/media/13885',
'md5': '4576a3bb2581f86c61044822adbd1249',
'info_dict': {
'id': '13885',
'ext': 'mp4',
'title': 'EverCam + Camdemy QuickStart',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
'creator': 'evercam',
'duration': 318,
},
}, {
# External source (YouTube)
'url': 'http://www.camdemy.com/media/14842',
'info_dict': {
'id': '2vsYQzNIsJo',
'ext': 'mp4',
'title': 'Excel 2013 Tutorial - How to add Password Protection',
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
'upload_date': '20130211',
'uploader': 'Hun Kim',
'uploader_id': 'hunkimtutorials',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
src_from = self._html_search_regex(
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
webpage, 'external source', default=None, group='url')
if src_from:
return self.url_result(src_from)
oembed_obj = self._download_json(
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
title = oembed_obj['title']
thumb_url = oembed_obj['thumbnail_url']
video_folder = urllib.parse.urljoin(thumb_url, 'video/')
file_list_doc = self._download_xml(
urllib.parse.urljoin(video_folder, 'fileList.xml'),
video_id, 'Downloading filelist XML')
file_name = file_list_doc.find('./video/item/fileName').text
video_url = urllib.parse.urljoin(video_folder, file_name)
# Some URLs return "No permission or not login" in a webpage despite being
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
upload_date = unified_strdate(self._search_regex(
r'>published on ([^<]+)<', webpage,
'upload date', default=None))
view_count = str_to_int(self._search_regex(
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
webpage, 'view count', default=None))
description = self._html_search_meta(
'description', webpage, default=None) or clean_html(
oembed_obj.get('description'))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumb_url,
'description': description,
'creator': oembed_obj.get('author_name'),
'duration': parse_duration(oembed_obj.get('duration')),
'upload_date': upload_date,
'view_count': view_count,
}
class CamdemyFolderIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
_TESTS = [{
# links with trailing slash
'url': 'http://www.camdemy.com/folder/450',
'info_dict': {
'id': '450',
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
},
'playlist_mincount': 145,
}, {
# links without trailing slash
# and multi-page
'url': 'http://www.camdemy.com/folder/853',
'info_dict': {
'id': '853',
'title': '科學計算 - 使用 Matlab',
},
'playlist_mincount': 20,
}, {
# with displayMode parameter. For testing the codes to add parameters
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
'info_dict': {
'id': '853',
'title': '科學計算 - 使用 Matlab',
},
'playlist_mincount': 20,
}]
def _real_extract(self, url):
folder_id = self._match_id(url)
# Add displayMode=list so that all links are displayed in a single page
parsed_url = list(urllib.parse.urlparse(url))
query = dict(urllib.parse.parse_qsl(parsed_url[4]))
query.update({'displayMode': 'list'})
parsed_url[4] = urllib.parse.urlencode(query)
final_url = urllib.parse.urlunparse(parsed_url)
page = self._download_webpage(final_url, folder_id)
matches = re.findall(r"href='(/media/\d+/?)'", page)
entries = [self.url_result('http://www.camdemy.com' + media_path)
for media_path in matches]
folder_title = self._html_search_meta('keywords', page)
return self.playlist_result(entries, folder_id, folder_title)

View File

@ -1,70 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
class ClippitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
_TEST = {
'url': 'https://www.clippituser.tv/c/evmgm',
'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
'info_dict': {
'id': 'evmgm',
'ext': 'mp4',
'title': 'Bye bye Brutus. #BattleBots - Clippit',
'uploader': 'lizllove',
'uploader_url': 'https://www.clippituser.tv/p/lizllove',
'timestamp': 1472183818,
'upload_date': '20160826',
'description': 'BattleBots | ABC',
'thumbnail': r're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
FORMATS = ('sd', 'hd')
quality = qualities(FORMATS)
formats = []
for format_id in FORMATS:
url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"',
webpage, 'url', fatal=False)
if not url:
continue
match = re.search(r'/(?P<height>\d+)\.mp4', url)
formats.append({
'url': url,
'format_id': format_id,
'quality': quality(format_id),
'height': int(match.group('height')) if match else None,
})
uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
webpage, 'uploader', fatal=False)
uploader_url = ('https://www.clippituser.tv/p/' + uploader
if uploader else None)
timestamp = self._html_search_regex(r'datetime="(.+?)"',
webpage, 'date', fatal=False)
thumbnail = self._html_search_regex(r'data-image="(.+?)"',
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'formats': formats,
'uploader': uploader,
'uploader_url': uploader_url,
'timestamp': parse_iso8601(timestamp),
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
}

View File

@ -1,113 +0,0 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class CONtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
'info_dict': {
'id': 'CEG10022949',
'ext': 'mp4',
'title': 'Days Of Thrills & Laughter',
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
'upload_date': '20180703',
'timestamp': 1530634789.61,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
'info_dict': {
'id': 'CLIP-show_fotld_bts',
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
},
'playlist_mincount': 7,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
details = self._download_json(
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
video_id, query={'device': 'web'})
if details.get('type') == 'episodic':
seasons = self._download_json(
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
video_id)
entries = []
for season in seasons:
for episode in season.get('episodes', []):
episode_id = episode.get('id')
if not episode_id:
continue
entries.append(self.url_result(
'https://www.contv.com/details-movie/' + episode_id,
CONtvIE.ie_key(), episode_id))
return self.playlist_result(entries, video_id, details.get('title'))
m_details = details['details']
title = details['title']
formats = []
media_hls_url = m_details.get('media_hls_url')
if media_hls_url:
formats.extend(self._extract_m3u8_formats(
media_hls_url, video_id, 'mp4',
m3u8_id='hls', fatal=False))
media_mp4_url = m_details.get('media_mp4_url')
if media_mp4_url:
formats.append({
'format_id': 'http',
'url': media_mp4_url,
})
subtitles = {}
captions = m_details.get('captions') or {}
for caption_url in captions.values():
subtitles.setdefault('en', []).append({
'url': caption_url,
})
thumbnails = []
for image in m_details.get('images', []):
image_url = image.get('url')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
description = None
for p in ('large_', 'medium_', 'small_', ''):
d = m_details.get(p + 'description')
if d:
description = d
break
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': description,
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
'subtitles': subtitles,
'duration': float_or_none(m_details.get('duration'), 1000),
'view_count': int_or_none(details.get('num_watched')),
'like_count': int_or_none(details.get('num_fav')),
'categories': details.get('category'),
'tags': details.get('tags'),
'season_number': int_or_none(details.get('season')),
'episode_number': int_or_none(details.get('episode')),
'release_year': int_or_none(details.get('pub_year')),
}

View File

@ -1,113 +0,0 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
class DroobleIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://drooble\.com/(?:
(?:(?P<user>[^/]+)/)?(?P<kind>song|videos|music/albums)/(?P<id>\d+)|
(?P<user_2>[^/]+)/(?P<kind_2>videos|music))
'''
_TESTS = [{
'url': 'https://drooble.com/song/2858030',
'md5': '5ffda90f61c7c318dc0c3df4179eb064',
'info_dict': {
'id': '2858030',
'ext': 'mp3',
'title': 'Skankocillin',
'upload_date': '20200801',
'timestamp': 1596241390,
'uploader_id': '95894',
'uploader': 'Bluebeat Shelter',
},
}, {
'url': 'https://drooble.com/karl340758/videos/2859183',
'info_dict': {
'id': 'J6QCQY_I5Tk',
'ext': 'mp4',
'title': 'Skankocillin',
'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg',
'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca',
'upload_date': '20200731',
'uploader': 'Bluebeat Shelter',
},
}, {
'url': 'https://drooble.com/karl340758/music/albums/2858031',
'info_dict': {
'id': '2858031',
},
'playlist_mincount': 8,
}, {
'url': 'https://drooble.com/karl340758/music',
'info_dict': {
'id': 'karl340758',
},
'playlist_mincount': 8,
}, {
'url': 'https://drooble.com/karl340758/videos',
'info_dict': {
'id': 'karl340758',
},
'playlist_mincount': 8,
}]
def _call_api(self, method, video_id, data=None):
response = self._download_json(
f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode())
if not response[0]:
raise ExtractorError('Unable to download JSON metadata')
return response[1]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
user = mobj.group('user') or mobj.group('user_2')
kind = mobj.group('kind') or mobj.group('kind_2')
display_id = mobj.group('id') or user
if mobj.group('kind_2') == 'videos':
data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'}
elif kind in ('music/albums', 'music'):
data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}}
else:
data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'}
method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements'
json_data = self._call_api(method, display_id, data=data)
if kind in ('music/albums', 'music'):
json_data = json_data['singles']['list']
entites = []
for media in json_data:
url = media.get('external_media_url') or media.get('link')
if url.startswith('https://www.youtube.com'):
entites.append({
'_type': 'url',
'url': url,
'ie_key': 'Youtube',
})
continue
is_audio = (media.get('type') or '').lower() == 'audio'
entites.append({
'url': url,
'id': media['id'],
'title': media['title'],
'duration': int_or_none(media.get('duration')),
'timestamp': int_or_none(media.get('timestamp')),
'album': try_get(media, lambda x: x['album']['title']),
'uploader': try_get(media, lambda x: x['creator']['display_name']),
'uploader_id': try_get(media, lambda x: x['creator']['id']),
'thumbnail': media.get('image_comment'),
'like_count': int_or_none(media.get('likes')),
'vcodec': 'none' if is_audio else None,
'ext': 'mp3' if is_audio else None,
})
if len(entites) > 1:
return self.playlist_result(entites, display_id)
return entites[0]

View File

@ -1,246 +0,0 @@
import base64
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_elements_by_class,
int_or_none,
js_to_json,
smuggle_url,
unescapeHTML,
)
def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
if tag is None:
tag = '[a-zA-Z0-9:._-]+'
if attribute is None:
attribute = ''
else:
attribute = rf'\s+(?P<attribute>{re.escape(attribute)})'
if value is None:
value = ''
else:
value = re.escape(value) if escape_value else value
value = f'=[\'"]?(?P<value>{value})[\'"]?'
retlist = []
for m in re.finditer(rf'''(?xs)
<(?P<tag>{tag})
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
{attribute}{value}
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s*>
(?P<content>.*?)
</\1>
''', html):
retlist.append(m)
return retlist
def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
retval = _get_elements_by_tag_and_attrib(html, tag, attribute, value, escape_value)
return retval[0] if retval else None
class DubokuIE(InfoExtractor):
IE_NAME = 'duboku'
IE_DESC = 'www.duboku.io'
_VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
_TESTS = [{
'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
'info_dict': {
'id': '1575-1-1',
'ext': 'mp4',
'series': '白色月光',
'title': 'contains:白色月光',
'season_number': 1,
'episode_number': 1,
'season': 'Season 1',
'episode_id': '1',
'season_id': '1',
'episode': 'Episode 1',
},
'params': {
'skip_download': 'm3u8 download',
},
}, {
'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
'info_dict': {
'id': '1588-1-1',
'ext': 'mp4',
'series': '亲爱的自己',
'title': 'contains:第1集',
'season_number': 1,
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'episode_id': '1',
'season_id': '1',
},
'params': {
'skip_download': 'm3u8 download',
},
}]
_PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*</script'
def _real_extract(self, url):
video_id = self._match_id(url)
temp = video_id.split('-')
series_id = temp[0]
season_id = temp[1]
episode_id = temp[2]
webpage_url = f'https://w.duboku.io/vodplay/{video_id}.html'
webpage_html = self._download_webpage(webpage_url, video_id)
# extract video url
player_data = self._search_regex(
self._PLAYER_DATA_PATTERN, webpage_html, 'player_data')
player_data = self._parse_json(player_data, video_id, js_to_json)
# extract title
temp = get_elements_by_class('title', webpage_html)
series_title = None
title = None
for html in temp:
mobj = re.search(r'<a\s+.*>(.*)</a>', html)
if mobj:
href = extract_attributes(mobj.group(0)).get('href')
if href:
mobj1 = re.search(r'/(\d+)\.html', href)
if mobj1 and mobj1.group(1) == series_id:
series_title = clean_html(mobj.group(0))
series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title)
title = clean_html(html)
title = re.sub(r'[\s\r\n\t]+', ' ', title)
break
data_url = player_data.get('url')
if not data_url:
raise ExtractorError('Cannot find url in player_data')
player_encrypt = player_data.get('encrypt')
if player_encrypt == 1:
data_url = urllib.parse.unquote(data_url)
elif player_encrypt == 2:
data_url = urllib.parse.unquote(base64.b64decode(data_url).decode('ascii'))
# if it is an embedded iframe, maybe it's an external source
headers = {'Referer': webpage_url}
if player_data.get('from') == 'iframe':
# use _type url_transparent to retain the meaningful details
# of the video.
return {
'_type': 'url_transparent',
'url': smuggle_url(data_url, {'referer': webpage_url}),
'id': video_id,
'title': title,
'series': series_title,
'season_number': int_or_none(season_id),
'season_id': season_id,
'episode_number': int_or_none(episode_id),
'episode_id': episode_id,
}
formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
return {
'id': video_id,
'title': title,
'series': series_title,
'season_number': int_or_none(season_id),
'season_id': season_id,
'episode_number': int_or_none(episode_id),
'episode_id': episode_id,
'formats': formats,
'http_headers': headers,
}
class DubokuPlaylistIE(InfoExtractor):
IE_NAME = 'duboku:list'
IE_DESC = 'www.duboku.io entire series'
_VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
_TESTS = [{
'url': 'https://w.duboku.io/voddetail/1575.html',
'info_dict': {
'id': 'startswith:1575',
'title': '白色月光',
},
'playlist_count': 12,
}, {
'url': 'https://w.duboku.io/voddetail/1554.html',
'info_dict': {
'id': 'startswith:1554',
'title': '以家人之名',
},
'playlist_mincount': 30,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
if mobj is None:
raise ExtractorError(f'Invalid URL: {url}')
series_id = mobj.group('id')
fragment = urllib.parse.urlparse(url).fragment
webpage_url = f'https://w.duboku.io/voddetail/{series_id}.html'
webpage_html = self._download_webpage(webpage_url, series_id)
# extract title
title = _get_element_by_tag_and_attrib(webpage_html, 'h1', 'class', 'title')
title = unescapeHTML(title.group('content')) if title else None
if not title:
title = self._html_search_meta('keywords', webpage_html)
if not title:
title = _get_element_by_tag_and_attrib(webpage_html, 'title')
title = unescapeHTML(title.group('content')) if title else None
# extract playlists
playlists = {}
for div in _get_elements_by_tag_and_attrib(
webpage_html, attribute='id', value='playlist\\d+', escape_value=False):
playlist_id = div.group('value')
playlist = []
for a in _get_elements_by_tag_and_attrib(
div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False):
playlist.append({
'href': unescapeHTML(a.group('value')),
'title': unescapeHTML(a.group('content')),
})
playlists[playlist_id] = playlist
# select the specified playlist if url fragment exists
playlist = None
playlist_id = None
if fragment:
playlist = playlists.get(fragment)
playlist_id = fragment
else:
first = next(iter(playlists.items()), None)
if first:
(playlist_id, playlist) = first
if not playlist:
raise ExtractorError(
f'Cannot find {fragment}' if fragment else 'Cannot extract playlist')
# return url results
return self.playlist_result([
self.url_result(
urllib.parse.urljoin('https://w.duboku.io', x['href']),
ie=DubokuIE.ie_key(), video_title=x.get('title'))
for x in playlist], series_id + '#' + playlist_id, title)

View File

@ -1,158 +0,0 @@
import json
import random
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = {
'name': 'EightTracks',
'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
'info_dict': {
'id': '1336550',
'display_id': 'youtube-dl-test-tracks-a',
'description': "test chars: \"'/\\ä↭",
'title': "youtube-dl test tracks \"'/\\ä↭<>",
},
'playlist': [
{
'md5': '96ce57f24389fc8734ce47f4c1abcc55',
'info_dict': {
'id': '11885610',
'ext': 'm4a',
'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': '4ab26f05c1f7291ea460a3920be8021f',
'info_dict': {
'id': '11885608',
'ext': 'm4a',
'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': 'd30b5b5f74217410f4689605c35d1fd7',
'info_dict': {
'id': '11885679',
'ext': 'm4a',
'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': '4eb0a669317cd725f6bbd336a29f923a',
'info_dict': {
'id': '11885680',
'ext': 'm4a',
'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': '1893e872e263a2705558d1d319ad19e8',
'info_dict': {
'id': '11885682',
'ext': 'm4a',
'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': 'b673c46f47a216ab1741ae8836af5899',
'info_dict': {
'id': '11885683',
'ext': 'm4a',
'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': '1d74534e95df54986da7f5abf7d842b7',
'info_dict': {
'id': '11885684',
'ext': 'm4a',
'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
{
'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
'info_dict': {
'id': '11885685',
'ext': 'm4a',
'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl',
},
},
],
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
data = self._parse_json(
self._search_regex(
r'(?s)PAGE\.mix\s*=\s*({.+?});\n', webpage, 'trax information'),
playlist_id)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
duration = data['duration']
avg_song_duration = float(duration) / track_count
# duration is sometimes negative, use predefined avg duration
if avg_song_duration <= 0:
avg_song_duration = 300
first_url = f'http://8tracks.com/sets/{session}/play?player=sm&mix_id={mix_id}&format=jsonh'
next_url = first_url
entries = []
for i in range(track_count):
api_json = None
download_tries = 0
while api_json is None:
try:
api_json = self._download_webpage(
next_url, playlist_id,
note='Downloading song information %d/%d' % (i + 1, track_count),
errnote='Failed to download song information')
except ExtractorError:
if download_tries > 3:
raise
else:
download_tries += 1
self._sleep(avg_song_duration, playlist_id)
api_data = json.loads(api_json)
track_data = api_data['set']['track']
info = {
'id': str(track_data['id']),
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + ' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
entries.append(info)
next_url = 'http://8tracks.com/sets/{}/next?player=sm&mix_id={}&format=jsonh&track_id={}'.format(
session, mix_id, track_data['id'])
return {
'_type': 'playlist',
'entries': entries,
'id': str(mix_id),
'display_id': playlist_id,
'title': data.get('name'),
'description': data.get('description'),
}

View File

@ -1,81 +0,0 @@
from .common import InfoExtractor
from ..networking import Request
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_iso8601,
)
class EitbIE(InfoExtractor):
IE_NAME = 'eitb.tv'
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
'md5': 'edf4436247185adee3ea18ce64c47998',
'info_dict': {
'id': '4090227752001',
'ext': 'mp4',
'title': '60 minutos (Lasa y Zabala, 30 años)',
'description': 'Programa de reportajes de actualidad.',
'duration': 3996.76,
'timestamp': 1381789200,
'upload_date': '20131014',
'tags': list,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
f'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/{video_id}/',
video_id, 'Downloading video JSON')
media = video['web_media'][0]
formats = []
for rendition in media['RENDITIONS']:
video_url = rendition.get('PMD_URL')
if not video_url:
continue
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
formats.append({
'url': rendition['PMD_URL'],
'format_id': join_nonempty('http', int_or_none(tbr)),
'width': int_or_none(rendition.get('FRAME_WIDTH')),
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
'tbr': tbr,
})
hls_url = media.get('HLS_SURL')
if hls_url:
request = Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url})
token_data = self._download_json(
request, video_id, 'Downloading auth token', fatal=False)
if token_data:
token = token_data.get('token')
if token:
formats.extend(self._extract_m3u8_formats(
f'{hls_url}?hdnts={token}', video_id, m3u8_id='hls', fatal=False))
hds_url = media.get('HDS_SURL')
if hds_url:
formats.extend(self._extract_f4m_formats(
'{}?hdcore=3.7.0'.format(hds_url.replace('euskalsvod', 'euskalvod')),
video_id, f4m_id='hds', fatal=False))
return {
'id': video_id,
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
'duration': float_or_none(media.get('LENGTH'), 1000),
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
'tags': media.get('TAGS'),
'formats': formats,
}

View File

@ -1,61 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
xpath_text,
)
class EyedoTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
'info_dict': {
'id': '16301',
'ext': 'mp4',
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
'uploader': 'Afnic Live',
'uploader_id': '8023',
},
}
_ROOT_URL = 'http://live.eyedo.net:1935/'
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_xml(f'http://eyedo.tv/api/live/GetLive/{video_id}', video_id)
def _add_ns(path):
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
if state_live_code == 'avenir':
raise ExtractorError(
f'{self.IE_NAME} said: We\'re sorry, but this video is not yet available.',
expected=True)
is_live = state_live_code == 'live'
m3u8_url = None
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
if is_live:
if xpath_text(video_data, 'Cdn') == 'true':
m3u8_url = f'http://rrr.sz.xlcdn.com/?account=eyedo&file=A{video_id}&type=live&service=wowza&protocol=http&output=playlist.m3u8'
else:
m3u8_url = self._ROOT_URL + f'w/{video_id}/eyedo_720p/playlist.m3u8'
else:
m3u8_url = self._ROOT_URL + f'replay-w/{video_id}/mp4:{video_id}.mp4/playlist.m3u8'
return {
'id': video_id,
'title': title,
'formats': self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native'),
'description': xpath_text(video_data, _add_ns('Description')),
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
'uploader': xpath_text(video_data, _add_ns('Createur')),
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
}

View File

@ -12,14 +12,6 @@ class FootyRoomIE(InfoExtractor):
},
'playlist_count': 2,
'add_ie': [StreamableIE.ie_key()],
}, {
'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
'info_dict': {
'id': '75817984',
'title': 'VIDEO Georgia 0 - 2 Germany',
},
'playlist_count': 1,
'add_ie': ['Playwire'],
}]
def _real_extract(self, url):
@ -38,13 +30,6 @@ class FootyRoomIE(InfoExtractor):
payload = video.get('payload')
if not payload:
continue
playwire_url = self._html_search_regex(
r'data-config="([^"]+)"', payload,
'playwire url', default=None)
if playwire_url:
entries.append(self.url_result(self._proto_relative_url(
playwire_url, 'http:'), 'Playwire'))
streamable_url = StreamableIE._extract_url(payload)
if streamable_url:
entries.append(self.url_result(

View File

@ -1,56 +0,0 @@
from .common import InfoExtractor
from ..utils import month_by_name
class FranceInterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
_TEST = {
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
'info_dict': {
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
'ext': 'mp3',
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
'description': 'md5:401969c5d318c061f86bda1fa359292b',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20160907',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video url', group='url')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
upload_date_str = self._search_regex(
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
webpage, 'upload date', fatal=False)
if upload_date_str:
upload_date_list = upload_date_str.split()
upload_date_list.reverse()
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
upload_date_list[2] = '%02d' % int(upload_date_list[2])
upload_date = ''.join(upload_date_list)
else:
upload_date = None
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': [{
'url': video_url,
'vcodec': 'none',
}],
}

View File

@ -1,73 +0,0 @@
from .common import InfoExtractor
from ..networking import HEADRequest
class FujiTVFODPlus7IE(InfoExtractor):
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
_BASE_URL = 'https://i.fod.fujitv.co.jp/'
_BITRATE_MAP = {
300: (320, 180),
800: (640, 360),
1200: (1280, 720),
2000: (1280, 720),
4000: (1920, 1080),
}
_TESTS = [{
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
'info_dict': {
'id': '5d40110076',
'ext': 'ts',
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
'series': 'ちびまる子ちゃん',
'series_id': '5d40',
'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
},
}, {
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
'info_dict': {
'id': '5d40810083',
'ext': 'ts',
'title': '#1324 『まる子とオニの子』の巻『結成2月をムダにしない会』の巻',
'description': 'md5:3972d900b896adc8ab1849e310507efa',
'series': 'ちびまる子ちゃん',
'series_id': '5d40',
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'},
'skip': 'Video available only in one week',
}]
def _real_extract(self, url):
series_id, video_id = self._match_valid_url(url).groups()
self._request_webpage(HEADRequest(url), video_id)
json_info = {}
token = self._get_cookies(url).get('CT')
if token:
json_info = self._download_json(
f'https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id={video_id}&is_premium=false',
video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
else:
self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}')
formats, subtitles = [], {}
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
for src in src_json['video_selector']:
if not src.get('url'):
continue
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
for f in fmt:
f.update(dict(zip(('height', 'width'),
self._BITRATE_MAP.get(f.get('tbr'), ()), strict=False)))
formats.extend(fmt)
subtitles = self._merge_subtitles(subtitles, subs)
return {
'id': video_id,
'title': json_info.get('ep_title'),
'series': json_info.get('lu_title'),
'series_id': series_id,
'description': json_info.get('ep_description'),
'formats': formats,
'subtitles': subtitles,
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
'_format_sort_fields': ('tbr', ),
}

View File

@ -1,70 +1,13 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_codecs,
parse_duration,
str_to_int,
unified_timestamp,
)
class GabTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
_TESTS = [{
'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
'info_dict': {
'id': '61217eacea5665de450d0488',
'ext': 'mp4',
'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
'uploader': 'Wurzelroot',
'uploader_id': '608fb0a85738fd1974984f7d',
'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url).split('-')[-1]
webpage = self._download_webpage(url, video_id)
channel_id = self._search_regex(r'data-channel-id=\"(?P<channel_id>[^\"]+)', webpage, 'channel_id')
channel_name = self._search_regex(r'data-channel-name=\"(?P<channel_id>[^\"]+)', webpage, 'channel_name')
title = self._search_regex(r'data-episode-title=\"(?P<channel_id>[^\"]+)', webpage, 'title')
view_key = self._search_regex(r'data-view-key=\"(?P<channel_id>[^\"]+)', webpage, 'view_key')
description = clean_html(
self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None
available_resolutions = re.findall(
rf'<a\ data-episode-id=\"{video_id}\"\ data-resolution=\"(?P<resolution>[^\"]+)', webpage)
formats = []
for resolution in available_resolutions:
frmt = {
'url': f'https://tv.gab.com/media/{video_id}?viewKey={view_key}&r={resolution}',
'format_id': resolution,
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
}
if 'audio-' in resolution:
frmt['abr'] = str_to_int(resolution.replace('audio-', ''))
frmt['height'] = 144
frmt['quality'] = -10
else:
frmt['height'] = str_to_int(resolution.replace('p', ''))
formats.append(frmt)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'uploader': channel_name,
'uploader_id': channel_id,
'thumbnail': f'https://tv.gab.com/image/{video_id}',
}
class GabIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gab\.com/[^/]+/posts/(?P<id>\d+)'
_TESTS = [{

View File

@ -1,84 +0,0 @@
import json
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
int_or_none,
try_get,
urlencode_postdata,
)
class GooglePodcastsBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
def _batch_execute(self, func_id, video_id, params):
return json.loads(self._download_json(
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
video_id, data=urlencode_postdata({
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
def _extract_episode(self, episode):
return {
'id': episode[4][3],
'title': episode[8],
'url': clean_podcast_url(episode[13]),
'thumbnail': episode[2],
'description': episode[9],
'creator': try_get(episode, lambda x: x[14]),
'timestamp': int_or_none(episode[11]),
'duration': int_or_none(episode[12]),
'series': episode[1],
}
class GooglePodcastsIE(GooglePodcastsBaseIE):
IE_NAME = 'google:podcasts'
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
'info_dict': {
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
'ext': 'mp3',
'title': 'WWDTM New Year 2021',
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
'upload_date': '20210102',
'timestamp': 1609606800,
'duration': 2901,
'series': "Wait Wait... Don't Tell Me!",
},
}
def _real_extract(self, url):
b64_feed_url, b64_guid = self._match_valid_url(url).groups()
episode = self._batch_execute(
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
return self._extract_episode(episode)
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
IE_NAME = 'google:podcasts:feed'
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
_TEST = {
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
'info_dict': {
'title': "Wait Wait... Don't Tell Me!",
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
},
'playlist_mincount': 20,
}
def _real_extract(self, url):
b64_feed_url = self._match_id(url)
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
entries = []
for episode in (try_get(data, lambda x: x[1][0]) or []):
entries.append(self._extract_episode(episode))
feed = try_get(data, lambda x: x[3]) or []
return self.playlist_result(
entries, playlist_title=try_get(feed, lambda x: x[0]),
playlist_description=try_get(feed, lambda x: x[2]))

View File

@ -1,47 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
parse_duration,
)
class GoshgayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
_TEST = {
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
'md5': '4b6db9a0a333142eb9f15913142b0ed1',
'info_dict': {
'id': '299069',
'ext': 'flv',
'title': 'DIESEL SFW XXX Video',
'thumbnail': r're:^http://.*\.jpg$',
'duration': 80,
'age_limit': 18,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h2>(.*?)<', webpage, 'title')
duration = parse_duration(self._html_search_regex(
r'<span class="duration">\s*-?\s*(.*?)</span>',
webpage, 'duration', fatal=False))
flashvars = urllib.parse.parse_qs(self._html_search_regex(
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
webpage, 'flashvars'))
thumbnail = flashvars.get('url_bigthumb', [None])[0]
video_url = flashvars['flv_url'][0]
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': 18,
}

View File

@ -1,32 +0,0 @@
from .common import InfoExtractor
class GPUTechConfIE(InfoExtractor):
_VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
_TEST = {
'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
'info_dict': {
'id': '5156',
'ext': 'mp4',
'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
'duration': 1219,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
root_path = self._search_regex(
r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
default='http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(
r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'{root_path}xml/{xml_file_id}.xml',
'ie_key': 'DigitallySpeaking',
}

View File

@ -1,183 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
merge_dicts,
parse_count,
str_or_none,
try_get,
unified_strdate,
urlencode_postdata,
urljoin,
)
class HKETVIE(InfoExtractor):
IE_NAME = 'hketv'
IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
_GEO_BYPASS = False
_GEO_COUNTRIES = ['HK']
_VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.hkedcity.net/etv/resource/2932360618',
'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
'info_dict': {
'id': '2932360618',
'ext': 'mp4',
'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
'description': 'md5:d5286d05219ef50e0613311cbe96e560',
'upload_date': '20181024',
'duration': 900,
'subtitles': 'count:2',
},
'skip': 'Geo restricted to HK',
}, {
'url': 'https://www.hkedcity.net/etv/resource/972641418',
'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
'info_dict': {
'id': '972641418',
'ext': 'mp4',
'title': '衣冠楚楚 (天使系列之一)',
'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
'upload_date': '20070109',
'duration': 907,
'subtitles': {},
},
'skip': 'Geo restricted to HK',
}]
_CC_LANGS = {
'中文(繁體中文)': 'zh-Hant',
'中文(简体中文)': 'zh-Hans',
'English': 'en',
'Bahasa Indonesia': 'id',
'\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
'\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
'Tagalog': 'tl',
'\u0e44\u0e17\u0e22': 'th',
'\u0627\u0631\u062f\u0648': 'ur',
}
_FORMAT_HEIGHTS = {
'SD': 360,
'HD': 720,
}
_APPS_BASE_URL = 'https://apps.hkedcity.net'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = (
self._html_search_meta(
('ed_title', 'search.ed_title'), webpage, default=None)
or self._search_regex(
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'title', default=None, group='url')
or self._html_search_regex(
r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
or self._og_search_title(webpage)
)
file_id = self._search_regex(
r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
webpage, 'file ID')
curr_url = self._search_regex(
r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
webpage, 'curr URL')
data = {
'action': 'get_info',
'curr_url': curr_url,
'file_id': file_id,
'video_url': file_id,
}
response = self._download_json(
self._APPS_BASE_URL + '/media/play/handler.php', video_id,
data=urlencode_postdata(data),
headers=merge_dicts({
'Content-Type': 'application/x-www-form-urlencoded'},
self.geo_verification_headers()))
result = response['result']
if not response.get('success') or not response.get('access'):
error = clean_html(response.get('access_err_msg'))
if 'Video streaming is not available in your country' in error:
self.raise_geo_restricted(
msg=error, countries=self._GEO_COUNTRIES)
else:
raise ExtractorError(error, expected=True)
formats = []
width = int_or_none(result.get('width'))
height = int_or_none(result.get('height'))
playlist0 = result['playlist'][0]
for fmt in playlist0['sources']:
file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
if not file_url:
continue
# If we ever wanted to provide the final resolved URL that
# does not require cookies, albeit with a shorter lifespan:
# urlh = self._downloader.urlopen(file_url)
# resolved_url = urlh.url
label = fmt.get('label')
h = self._FORMAT_HEIGHTS.get(label)
w = h * width // height if h and width and height else None
formats.append({
'format_id': label,
'ext': fmt.get('type'),
'url': file_url,
'width': w,
'height': h,
})
subtitles = {}
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
for track in tracks:
if not isinstance(track, dict):
continue
track_kind = str_or_none(track.get('kind'))
if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
if not track_url:
continue
track_label = track.get('label')
subtitles.setdefault(self._CC_LANGS.get(
track_label, track_label), []).append({
'url': self._proto_relative_url(track_url),
'ext': 'srt',
})
# Likes
emotion = self._download_json(
'https://emocounter.hkedcity.net/handler.php', video_id,
data=urlencode_postdata({
'action': 'get_emotion',
'data[bucket_id]': 'etv',
'data[identifier]': video_id,
}),
headers={'Content-Type': 'application/x-www-form-urlencoded'},
fatal=False) or {}
like_count = int_or_none(try_get(
emotion, lambda x: x['data']['emotion_data'][0]['count']))
return {
'id': video_id,
'title': title,
'description': self._html_search_meta(
'description', webpage, fatal=False),
'upload_date': unified_strdate(self._html_search_meta(
'ed_date', webpage, fatal=False), day_first=False),
'duration': int_or_none(result.get('length')),
'formats': formats,
'subtitles': subtitles,
'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
'view_count': parse_count(result.get('view_count')),
'like_count': like_count,
}

View File

@ -1,115 +0,0 @@
from .common import InfoExtractor
from ..utils import traverse_obj, try_call, url_or_none
class IdolPlusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P<id>\w+)'
_TESTS = [{
'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00',
'md5': '2ace3f4661c943a2f7e79f0b88cea1e7',
'info_dict': {
'id': 'M012077298PPV00',
'ext': 'mp4',
'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)',
'release_date': '20200707',
'formats': 'count:65',
},
'params': {'format': '532-KIM_MINJU'},
}, {
'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5',
'info_dict': {
'id': 'M01232H058PPV00',
'ext': 'mp4',
'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)',
'release_date': '20230218',
'formats': 'count:5',
},
'params': {'skip_download': 'm3u8'},
}, {
# live stream
'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00',
'info_dict': {
'id': 'M012323174PPV00',
'ext': 'mp4',
'title': 'Hanteo Music Awards 2022 DAY2',
'release_date': '20230211',
'formats': 'count:5',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://idolplus.com/zs/concert/M012323039PPV00',
'info_dict': {
'id': 'M012323039PPV00',
'ext': 'mp4',
'title': 'CIRCLE CHART MUSIC AWARDS 2022',
'release_date': '20230218',
'formats': 'count:5',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data_list = traverse_obj(self._download_json(
'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id,
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
'rulesetId': 'contents',
'albumId': video_id,
'distribute': 'PRD',
'loggedIn': 'false',
'region': 'zs',
'countryGroup': '00010',
'lang': 'en',
'saId': '999999999998',
}), ('data', 'viewData', ...))
player_data = {}
while data_list:
player_data = data_list.pop()
if traverse_obj(player_data, 'type') == 'player':
break
elif traverse_obj(player_data, ('dataList', ...)):
data_list += player_data['dataList']
formats = self._extract_m3u8_formats(traverse_obj(player_data, (
'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id)
subtitles = {}
for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []:
subtitles.setdefault(caption.get('lang') or 'und', []).append({
'url': caption.get('smi_url'),
'ext': 'vtt',
})
# Add member multicams as alternative formats
if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y'
and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'):
cuesheet = traverse_obj(self._download_json(
'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id,
'Downloading JSON metadata for member multicams',
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
'ALBUM_ID': video_id,
'COUNTRY_GRP': '00010',
'LANG': 'en',
'SA_ID': '999999999998',
'COUNTRY_CODE': 'KR',
}), ('data', 'cuesheet_item', 0))
for member in traverse_obj(cuesheet, ('members', ...)):
index = try_call(lambda: int(member['omni_view_index']) - 1)
member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none}))
if not member_video_url:
continue
member_formats = self._extract_m3u8_formats(
member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}')
for mf in member_formats:
mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}'
formats.extend(member_formats)
return {
'id': video_id,
'title': traverse_obj(player_data, ('detail', 'albumName')),
'formats': formats,
'subtitles': subtitles,
'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')),
}

View File

@ -1,7 +1,7 @@
import base64
import urllib.parse
from .bokecc import BokeCCBaseIE
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
@ -10,7 +10,7 @@ from ..utils import (
)
class InfoQIE(BokeCCBaseIE):
class InfoQIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
_TESTS = [{
@ -117,14 +117,10 @@ class InfoQIE(BokeCCBaseIE):
video_title = self._html_extract_title(webpage)
video_description = self._html_search_meta('description', webpage, 'description')
if '/cn/' in url:
# for China videos, HTTP video URL exists but always fails with 403
formats = self._extract_bokecc_formats(webpage, video_id)
else:
formats = (
self._extract_rtmp_video(webpage)
+ self._extract_http_video(webpage)
+ self._extract_http_audio(webpage, video_id))
formats = (
self._extract_rtmp_video(webpage)
+ self._extract_http_video(webpage)
+ self._extract_http_audio(webpage, video_id))
return {
'id': video_id,

View File

@ -1,58 +0,0 @@
import json
import re
from .common import InfoExtractor
from ..utils import parse_qs
class InternetVideoArchiveIE(InfoExtractor):
_VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
_TEST = {
'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
'info_dict': {
'id': '194487',
'ext': 'mp4',
'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
@staticmethod
def _build_json_url(query):
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
def _real_extract(self, url):
query = parse_qs(url)
video_id = query['publishedid'][0]
data = self._download_json(
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
video_id, data=json.dumps({
'customerid': query['customerid'][0],
'publishedid': video_id,
}).encode())
title = data['Title']
formats = self._extract_m3u8_formats(
data['VideoUrl'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
file_url = formats[0]['url']
if '.ism/' in file_url:
replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
formats.extend(self._extract_f4m_formats(
replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': data.get('PosterUrl'),
'description': data.get('Description'),
}

View File

@ -1,111 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
get_element_by_id,
int_or_none,
parse_iso8601,
str_to_int,
)
class IzleseneIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:(?:www|m)\.)?izlesene\.com/
(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
'''
_TESTS = [
{
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
'md5': '4384f9f0ea65086734b881085ee05ac2',
'info_dict': {
'id': '7599694',
'ext': 'mp4',
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
'description': 'md5:253753e2655dde93f59f74b572454f6d',
'thumbnail': r're:^https?://.*\.jpg',
'uploader_id': 'pelikzzle',
'timestamp': int,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
},
},
{
'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
'md5': '97f09b6872bffa284cb7fa4f6910cb72',
'info_dict': {
'id': '17997',
'ext': 'mp4',
'title': 'Tarkan Dortmund 2006 Konseri',
'thumbnail': r're:^https://.*\.jpg',
'uploader_id': 'parlayankiz',
'timestamp': int,
'upload_date': '20061112',
'duration': 253.666,
'age_limit': 0,
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'http://www.izlesene.com/video/{video_id}', video_id)
video = self._parse_json(
self._search_regex(
r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
video_id)
title = video.get('videoTitle') or self._og_search_title(webpage)
formats = []
for stream in video['media']['level']:
source_url = stream.get('source')
if not source_url or not isinstance(source_url, str):
continue
ext = determine_ext(url, 'mp4')
quality = stream.get('value')
height = int_or_none(quality)
formats.append({
'format_id': f'{quality}p' if quality else 'sd',
'url': urllib.parse.unquote(source_url),
'ext': ext,
'height': height,
})
description = self._og_search_description(webpage, default=None)
thumbnail = video.get('posterURL') or self._proto_relative_url(
self._og_search_thumbnail(webpage), scheme='http:')
uploader = self._html_search_regex(
r"adduserUsername\s*=\s*'([^']+)';",
webpage, 'uploader', fatal=False)
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date'))
duration = float_or_none(video.get('duration') or self._html_search_regex(
r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'duration', fatal=False, group='value'), scale=1000)
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
comment_count = self._html_search_regex(
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader_id': uploader,
'timestamp': timestamp,
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
'age_limit': self._family_friendly_search(webpage),
'formats': formats,
}

View File

@ -1,206 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
strip_or_none,
try_get,
)
class KinjaEmbedIE(InfoExtractor):
IE_NAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?:
avclub|
clickhole|
deadspin|
gizmodo|
jalopnik|
jezebel|
kinja|
kotaku|
lifehacker|
splinternews|
the(?:inventory|onion|root|takeout)
)\.com'''
_COMMON_REGEX = r'''/
(?:
ajax/inset|
embed/video
)/iframe\?.*?\bid='''
_VALID_URL = rf'''(?x)https?://{_DOMAIN_REGEX}{_COMMON_REGEX}
(?P<type>
fb|
imgur|
instagram|
jwp(?:layer)?-video|
kinjavideo|
mcp|
megaphone|
soundcloud(?:-playlist)?|
tumblr-post|
twitch-stream|
twitter|
ustream-channel|
vimeo|
vine|
youtube-(?:list|video)
)-(?P<id>[^&]+)'''
_EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
'id': '106351',
'ext': 'mp4',
'title': 'Dont Understand Bitcoin? This Man Will Mumble An Explanation At You',
},
'skip': 'Invalid URL',
}]
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
_PROVIDER_MAP = {
'fb': ('facebook.com/video.php?v=', 'Facebook'),
'imgur': ('imgur.com/', 'Imgur'),
'instagram': ('instagram.com/p/', 'Instagram'),
'jwplayer-video': _JWPLATFORM_PROVIDER,
'jwp-video': _JWPLATFORM_PROVIDER,
'megaphone': ('player.megaphone.fm/', 'Generic'),
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
'twitch-stream': ('twitch.tv/', 'TwitchStream'),
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
'vimeo': ('vimeo.com/', 'Vimeo'),
'vine': ('vine.co/v/', 'Vine'),
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
'youtube-video': ('youtube.com/embed/', 'Youtube'),
}
def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).groups()
provider = self._PROVIDER_MAP.get(video_type)
if provider:
video_id = urllib.parse.unquote(video_id)
if video_type == 'tumblr-post':
video_id, blog = video_id.split('-', 1)
result_url = provider[0] % (blog, video_id)
elif video_type == 'youtube-list':
video_id, playlist_id = video_id.split('/')
result_url = provider[0] % (video_id, playlist_id)
else:
result_url = provider[0] + video_id
return self.url_result('http://' + result_url, provider[1])
if video_type == 'kinjavideo':
data = self._download_json(
'https://kinja.com/api/core/video/views/videoById',
video_id, query={'videoId': video_id})['data']
title = data['title']
formats = []
for k in ('signedPlaylist', 'streaming'):
m3u8_url = data.get(k + 'Url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
thumbnail = None
poster = data.get('poster') or {}
poster_id = poster.get('id')
if poster_id:
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/{}.{}'.format(poster_id, poster.get('format') or 'jpg')
return {
'id': video_id,
'title': title,
'description': strip_or_none(data.get('description')),
'formats': formats,
'tags': data.get('tags'),
'timestamp': int_or_none(try_get(
data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
'thumbnail': thumbnail,
'uploader': data.get('network'),
}
else:
video_data = self._download_json(
'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
video_id)['videoMetadata']
iptc = video_data['photoVideoMetadataIPTC']
title = iptc['title']['en']
fmg = video_data.get('photoVideoMetadata_fmg') or {}
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
data = self._download_json(
tvss_domain + '/api/v3/video-auth/url-signature-tokens',
video_id, query={'mcpids': video_id})['data'][0]
formats = []
rendition_url = data.get('renditionUrl')
if rendition_url:
formats = self._extract_m3u8_formats(
rendition_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
fallback_rendition_url = data.get('fallbackRenditionUrl')
if fallback_rendition_url:
formats.append({
'format_id': 'fallback',
'tbr': int_or_none(self._search_regex(
r'_(\d+)\.mp4', fallback_rendition_url,
'bitrate', default=None)),
'url': fallback_rendition_url,
})
return {
'id': video_id,
'title': title,
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], str),
'uploader': fmg.get('network'),
'duration': int_or_none(iptc.get('fileDuration')),
'formats': formats,
'description': try_get(iptc, lambda x: x['description']['en'], str),
'timestamp': parse_iso8601(iptc.get('dateReleased')),
}

View File

@ -1,115 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
try_get,
)
class KooIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
_TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
'info_dict': {
'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
'ext': 'mp4',
'title': 'test for video in comment',
'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
'timestamp': 1632215195,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 7000,
'upload_date': '20210921',
},
'params': {'skip_download': True},
}, { # Test for koo with long title
'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
'info_dict': {
'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
'ext': 'mp4',
'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
'description': 'md5:06a6a84e9321499486dab541693d8425',
'timestamp': 1632106884,
'uploader_id': 'laxman_kumarDBFEC',
'uploader': 'Laxman Kumar 🇮🇳',
'duration': 46000,
'upload_date': '20210920',
},
'params': {'skip_download': True},
}, { # Test for audio
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
'info_dict': {
'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
'ext': 'mp4',
'title': 'Test for audio',
'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
'timestamp': 1632211634,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 214000,
'upload_date': '20210921',
},
'params': {'skip_download': True},
}, { # Test for video
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
'info_dict': {
'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
'ext': 'mp4',
'title': 'Test for video',
'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
'timestamp': 1632211468,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 14000,
'upload_date': '20210921',
},
'params': {'skip_download': True},
}, { # Test for link
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
'skip': 'No video/audio found at the provided url.',
'info_dict': {
'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
'title': 'Test for link',
'ext': 'none',
},
}, { # Test for images
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
'skip': 'No video/audio found at the provided url.',
'info_dict': {
'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
'title': 'Test for images',
'ext': 'none',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data_json = self._download_json(
f'https://www.kooapp.com/apiV1/ku/{video_id}?limit=20&offset=0&showSimilarKoos=true', video_id)['parentContent']
item_json = next(content['items'][0] for content in data_json
if try_get(content, lambda x: x['items'][0]['id']) == video_id)
media_json = item_json['mediaMap']
formats = []
mp4_url = media_json.get('videoMp4')
video_m3u8_url = media_json.get('videoHls')
if mp4_url:
formats.append({
'url': mp4_url,
'ext': 'mp4',
})
if video_m3u8_url:
formats.extend(self._extract_m3u8_formats(video_m3u8_url, video_id, fatal=False, ext='mp4'))
if not formats:
self.raise_no_formats('No video/audio found at the provided url.', expected=True)
return {
'id': video_id,
'title': clean_html(item_json.get('title')),
'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
'timestamp': item_json.get('createdAt'),
'uploader_id': item_json.get('handle'),
'uploader': item_json.get('name'),
'duration': media_json.get('duration'),
'formats': formats,
}

View File

@ -1,9 +1,6 @@
import base64
import datetime as dt
import hashlib
import re
import time
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_ord
@ -14,8 +11,6 @@ from ..utils import (
int_or_none,
orderedSet,
parse_iso8601,
str_or_none,
url_basename,
urshift,
)
@ -248,114 +243,3 @@ class LePlaylistIE(InfoExtractor):
return self.playlist_result(entries, playlist_id, playlist_title=title,
playlist_description=description)
class LetvCloudIE(InfoExtractor):
# Most of *.letv.com is changed to *.le.com on 2016/01/02
# but yuntv.letv.com is kept, so also keep the extractor name
IE_DESC = '乐视云'
_VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
_TESTS = [{
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
'md5': '26450599afd64c513bc77030ad15db44',
'info_dict': {
'id': 'p7jnfw5hw9_467623dedf',
'ext': 'mp4',
'title': 'Video p7jnfw5hw9_467623dedf',
},
}, {
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
'info_dict': {
'id': 'p7jnfw5hw9_ec93197892',
'ext': 'mp4',
'title': 'Video p7jnfw5hw9_ec93197892',
},
}, {
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
'info_dict': {
'id': 'p7jnfw5hw9_187060b6fd',
'ext': 'mp4',
'title': 'Video p7jnfw5hw9_187060b6fd',
},
}]
@staticmethod
def sign_data(obj):
if obj['cf'] == 'flash':
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
elif obj['cf'] == 'html5':
salt = 'fbeh5player12c43eccf2bec3300344'
items = ['cf', 'ran', 'uu', 'bver', 'vu']
input_data = ''.join([item + obj[item] for item in items]) + salt
obj['sign'] = hashlib.md5(input_data.encode()).hexdigest()
def _get_formats(self, cf, uu, vu, media_id):
def get_play_json(cf, timestamp):
data = {
'cf': cf,
'ver': '2.2',
'bver': 'firefox44.0',
'format': 'json',
'uu': uu,
'vu': vu,
'ran': str(timestamp),
}
self.sign_data(data)
return self._download_json(
'http://api.letvcloud.com/gpc.php?' + urllib.parse.urlencode(data),
media_id, f'Downloading playJson data for type {cf}')
play_json = get_play_json(cf, time.time())
# The server time may be different from local time
if play_json.get('code') == 10071:
play_json = get_play_json(cf, play_json['timestamp'])
if not play_json.get('data'):
if play_json.get('message'):
raise ExtractorError('Letv cloud said: {}'.format(play_json['message']), expected=True)
elif play_json.get('code'):
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
else:
raise ExtractorError('Letv cloud returned an unknown error')
def b64decode(s):
return base64.b64decode(s).decode('utf-8')
formats = []
for media in play_json['data']['video_info']['media'].values():
play_url = media['play_url']
url = b64decode(play_url['main_url'])
decoded_url = b64decode(url_basename(url))
formats.append({
'url': url,
'ext': determine_ext(decoded_url),
'format_id': str_or_none(play_url.get('vtype')),
'format_note': str_or_none(play_url.get('definition')),
'width': int_or_none(play_url.get('vwidth')),
'height': int_or_none(play_url.get('vheight')),
})
return formats
def _real_extract(self, url):
uu_mobj = re.search(r'uu=([\w]+)', url)
vu_mobj = re.search(r'vu=([\w]+)', url)
if not uu_mobj or not vu_mobj:
raise ExtractorError(f'Invalid URL: {url}', expected=True)
uu = uu_mobj.group(1)
vu = vu_mobj.group(1)
media_id = uu + '_' + vu
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
return {
'id': media_id,
'title': f'Video {media_id}',
'formats': formats,
}

View File

@ -1,386 +0,0 @@
import itertools
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
determine_ext,
find_xpath_attr,
float_or_none,
int_or_none,
orderedSet,
parse_iso8601,
traverse_obj,
update_url_query,
xpath_attr,
xpath_text,
xpath_with_ns,
)
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'''(?x)
https?://(?:new\.)?livestream\.com/
(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))
(?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))?
(?:/videos/(?P<id>\d+))?
'''
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '7876c5f5dc3e711b6b73acce4aac1527',
'info_dict': {
'id': '4719370',
'ext': 'mp4',
'title': 'Live from Webster Hall NYC',
'timestamp': 1350008072,
'upload_date': '20121012',
'duration': 5968.0,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:^http://.*\.jpg$',
},
}, {
'url': 'https://livestream.com/coheedandcambria/websterhall',
'info_dict': {
'id': '1585861',
'title': 'Live From Webster Hall',
},
'playlist_mincount': 1,
}, {
'url': 'https://livestream.com/dayananda/events/7954027',
'info_dict': {
'title': 'Live from Mevo',
'id': '7954027',
},
'playlist_mincount': 4,
}, {
'url': 'https://livestream.com/accounts/82',
'info_dict': {
'id': '253978',
'view_count': int,
'title': 'trsr',
'comment_count': int,
'like_count': int,
'upload_date': '20120306',
'timestamp': 1331042383,
'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
'duration': 15.332,
'ext': 'mp4',
},
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
}, {
'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
'only_matching': True,
}]
_API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
def _parse_smil_formats_and_subtitles(
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base_ele = find_xpath_attr(
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
formats = []
video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
for vn in video_nodes:
tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
furl = (
update_url_query(urllib.parse.urljoin(base, vn.attrib['src']), {
'v': '3.0.3',
'fp': 'WIN% 14,0,0,145',
}))
if 'clipBegin' in vn.attrib:
furl += '&ssek=' + vn.attrib['clipBegin']
formats.append({
'url': furl,
'format_id': 'smil_%d' % tbr,
'ext': 'flv',
'tbr': tbr,
'preference': -1000, # Strictly inferior than all other formats?
})
return formats, {}
def _extract_video_info(self, video_data):
video_id = str(video_data['id'])
FORMAT_KEYS = (
('sd', 'progressive_url'),
('hd', 'progressive_url_hd'),
)
formats = []
for format_id, key in FORMAT_KEYS:
video_url = video_data.get(key)
if video_url:
ext = determine_ext(video_url)
if ext == 'm3u8':
continue
bitrate = int_or_none(self._search_regex(
rf'(\d+)\.{ext}', video_url, 'bitrate', default=None))
formats.append({
'url': video_url,
'format_id': format_id,
'tbr': bitrate,
'ext': ext,
})
smil_url = video_data.get('smil_url')
if smil_url:
formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
m3u8_url = video_data.get('m3u8_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = video_data.get('f4m_url')
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
comments = [{
'author_id': comment.get('author_id'),
'author': comment.get('author', {}).get('full_name'),
'id': comment.get('id'),
'text': comment['text'],
'timestamp': parse_iso8601(comment.get('created_at')),
} for comment in video_data.get('comments', {}).get('data', [])]
return {
'id': video_id,
'formats': formats,
'title': video_data['caption'],
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail_url'),
'duration': float_or_none(video_data.get('duration'), 1000),
'timestamp': parse_iso8601(video_data.get('publish_at')),
'like_count': video_data.get('likes', {}).get('total'),
'comment_count': video_data.get('comments', {}).get('total'),
'view_count': video_data.get('views'),
'comments': comments,
}
def _extract_stream_info(self, stream_info):
broadcast_id = str(stream_info['broadcast_id'])
is_live = stream_info.get('is_live')
formats = []
smil_url = stream_info.get('play_url')
if smil_url:
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
m3u8_url = stream_info.get('m3u8_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
rtsp_url = stream_info.get('rtsp_url')
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
return {
'id': broadcast_id,
'formats': formats,
'title': stream_info['stream_title'],
'thumbnail': stream_info.get('thumbnail_url'),
'is_live': is_live,
}
def _generate_event_playlist(self, event_data):
event_id = str(event_data['id'])
account_id = str(event_data['owner_account_id'])
feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
stream_info = event_data.get('stream_info')
if stream_info:
return self._extract_stream_info(stream_info)
last_video = None
for i in itertools.count(1):
if last_video is None:
info_url = feed_root_url
else:
info_url = f'{feed_root_url}?&id={last_video}&newer=-1&type=video'
videos_info = self._download_json(
info_url, event_id, f'Downloading page {i}')['data']
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
if not videos_info:
break
for v in videos_info:
v_id = str(v['id'])
yield self.url_result(
f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
LivestreamIE, v_id, v.get('caption'))
last_video = videos_info[-1]['id']
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
event = mobj.group('event_id') or mobj.group('event_name')
account = mobj.group('account_id') or mobj.group('account_name')
api_url = f'http://livestream.com/api/accounts/{account}'
if video_id:
video_data = self._download_json(
f'{api_url}/events/{event}/videos/{video_id}', video_id)
return self._extract_video_info(video_data)
elif event:
event_data = self._download_json(f'{api_url}/events/{event}', None)
return self.playlist_result(
self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
account_data = self._download_json(api_url, None)
items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
return self.playlist_result(
itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
account_data.get('id'), account_data.get('full_name'))
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original'
_VALID_URL = r'''(?x)https?://original\.livestream\.com/
(?P<user>[^/\?#]+)(?:/(?P<type>video|folder)
(?:(?:\?.*?Id=|/)(?P<id>.*?)(&|$))?)?
'''
_TESTS = [{
'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'ext': 'mp4',
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
'duration': 771.301,
'view_count': int,
},
}, {
'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
'info_dict': {
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
},
'playlist_mincount': 4,
}, {
# live stream
'url': 'http://original.livestream.com/znsbahamas',
'only_matching': True,
}]
def _extract_video_info(self, user, video_id):
api_url = f'http://x{user}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={video_id}'
info = self._download_xml(api_url, video_id)
item = info.find('channel').find('item')
title = xpath_text(item, 'title')
media_ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = xpath_attr(
item, xpath_with_ns('media:thumbnail', media_ns), 'url')
duration = float_or_none(xpath_attr(
item, xpath_with_ns('media:content', media_ns), 'duration'))
ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'}
view_count = int_or_none(xpath_text(
item, xpath_with_ns('ls:viewsCount', ls_ns)))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail_url,
'duration': duration,
'view_count': view_count,
}
def _extract_video_formats(self, video_data, video_id):
formats = []
progressive_url = video_data.get('progressiveUrl')
if progressive_url:
formats.append({
'url': progressive_url,
'format_id': 'http',
})
m3u8_url = video_data.get('httpUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
rtsp_url = video_data.get('rtspUrl')
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
return formats
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
paths = orderedSet(re.findall(
r'''(?x)(?:
<li\s+class="folder">\s*<a\s+href="|
<a\s+href="(?=https?://livestre\.am/)
)([^"]+)"''', webpage))
entries = [{
'_type': 'url',
'url': urllib.parse.urljoin(url, p),
} for p in paths]
return self.playlist_result(entries, folder_id)
def _real_extract(self, url):
mobj = self._match_valid_url(url)
user = mobj.group('user')
url_type = mobj.group('type')
content_id = mobj.group('id')
if url_type == 'folder':
return self._extract_folder(url, content_id)
else:
# this url is used on mobile devices
stream_url = f'http://x{user}x.api.channel.livestream.com/3.0/getstream.json'
info = {}
if content_id:
stream_url += f'?id={content_id}'
info = self._extract_video_info(user, content_id)
else:
content_id = user
webpage = self._download_webpage(url, content_id)
info = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
}
video_data = self._download_json(stream_url, content_id)
is_live = video_data.get('isLive')
info.update({
'id': content_id,
'title': info['title'],
'formats': self._extract_video_formats(video_data, content_id),
'is_live': is_live,
})
return info
# The server doesn't support HEAD request, the generic extractor can't detect
# the redirection
class LivestreamShortenerIE(InfoExtractor):
IE_NAME = 'livestream:shortener'
IE_DESC = False # Do not list
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(self._og_search_url(webpage))

View File

@ -1,325 +0,0 @@
import itertools
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
urlencode_postdata,
)
class LyndaBaseIE(InfoExtractor):
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
_USER_URL = 'https://www.lynda.com/signin/user'
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_NETRC_MACHINE = 'lynda'
@staticmethod
def _check_error(json_string, key_or_keys):
keys = [key_or_keys] if isinstance(key_or_keys, str) else key_or_keys
for key in keys:
error = json_string.get(key)
if error:
raise ExtractorError(f'Unable to login: {error}', expected=True)
def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
action_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
'post url', default=fallback_action_url, group='url')
if not action_url.startswith('http'):
action_url = urllib.parse.urljoin(self._SIGNIN_URL, action_url)
form_data = self._hidden_inputs(form_html)
form_data.update(extra_form_data)
response = self._download_json(
action_url, None, note,
data=urlencode_postdata(form_data),
headers={
'Referer': referrer_url,
'X-Requested-With': 'XMLHttpRequest',
}, expected_status=(418, 500))
self._check_error(response, ('email', 'password', 'ErrorMessage'))
return response, action_url
def _perform_login(self, username, password):
# Step 1: download signin page
signin_page = self._download_webpage(
self._SIGNIN_URL, None, 'Downloading signin page')
# Already logged in
if any(re.search(p, signin_page) for p in (
r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
return
# Step 2: submit email
signin_form = self._search_regex(
r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
signin_page, 'signin form')
signin_page, signin_url = self._login_step(
signin_form, self._PASSWORD_URL, {'email': username},
'Submitting email', self._SIGNIN_URL)
# Step 3: submit password
password_form = signin_page['body']
self._login_step(
password_form, self._USER_URL, {'email': username, 'password': password},
'Submitting password', signin_url)
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?(?:lynda\.com|educourse\.ga)/
(?:
(?:[^/]+/){2,3}(?P<course_id>\d+)|
player/embed
)/
(?P<id>\d+)
'''
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
_TESTS = [{
'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
# md5 is unstable
'info_dict': {
'id': '114408',
'ext': 'mp4',
'title': 'Using the exercise files',
'duration': 68,
},
}, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
}, {
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'only_matching': True,
}, {
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
'only_matching': True,
}, {
# Status="NotFound", Message="Transcript not found"
'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
'only_matching': True,
}]
def _raise_unavailable(self, video_id):
self.raise_login_required(
f'Video {video_id} is only available for members')
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
course_id = mobj.group('course_id')
query = {
'videoId': video_id,
'type': 'video',
}
video = self._download_json(
'https://www.lynda.com/ajax/player', video_id,
'Downloading video JSON', fatal=False, query=query)
# Fallback scenario
if not video:
query['courseId'] = course_id
play = self._download_json(
f'https://www.lynda.com/ajax/course/{course_id}/{video_id}/play', video_id, 'Downloading play JSON')
if not play:
self._raise_unavailable(video_id)
formats = []
for formats_dict in play:
urls = formats_dict.get('urls')
if not isinstance(urls, dict):
continue
cdn = formats_dict.get('name')
for format_id, format_url in urls.items():
if not format_url:
continue
formats.append({
'url': format_url,
'format_id': f'{cdn}-{format_id}' if cdn else format_id,
'height': int_or_none(format_id),
})
conviva = self._download_json(
'https://www.lynda.com/ajax/player/conviva', video_id,
'Downloading conviva JSON', query=query)
return {
'id': video_id,
'title': conviva['VideoTitle'],
'description': conviva.get('VideoDescription'),
'release_year': int_or_none(conviva.get('ReleaseYear')),
'duration': int_or_none(conviva.get('Duration')),
'creator': conviva.get('Author'),
'formats': formats,
}
if 'Status' in video:
raise ExtractorError(
'lynda returned error: {}'.format(video['Message']), expected=True)
if video.get('HasAccess') is False:
self._raise_unavailable(video_id)
video_id = str(video.get('ID') or video_id)
duration = int_or_none(video.get('DurationInSeconds'))
title = video['Title']
formats = []
fmts = video.get('Formats')
if fmts:
formats.extend([{
'url': f['Url'],
'ext': f.get('Extension'),
'width': int_or_none(f.get('Width')),
'height': int_or_none(f.get('Height')),
'filesize': int_or_none(f.get('FileSize')),
'format_id': str(f.get('Resolution')) if f.get('Resolution') else None,
} for f in fmts if f.get('Url')])
prioritized_streams = video.get('PrioritizedStreams')
if prioritized_streams:
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
formats.extend([{
'url': video_url,
'height': int_or_none(format_id),
'format_id': f'{prioritized_stream_id}-{format_id}',
} for format_id, video_url in prioritized_stream.items()])
self._check_formats(formats, video_id)
subtitles = self.extract_subtitles(video_id)
return {
'id': video_id,
'title': title,
'duration': duration,
'subtitles': subtitles,
'formats': formats,
}
def _fix_subtitles(self, subs):
srt = ''
seq_counter = 0
for seq_current, seq_next in itertools.pairwise(subs):
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
if m_current is None:
continue
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
if m_next is None:
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
text = seq_current['Caption'].strip()
if text:
seq_counter += 1
srt += f'{seq_counter}\r\n{appear_time} --> {disappear_time}\r\n{text}\r\n\r\n'
if srt:
return srt
def _get_subtitles(self, video_id):
url = f'https://www.lynda.com/ajax/player?videoId={video_id}&type=transcript'
subs = self._download_webpage(
url, video_id, 'Downloading subtitles JSON', fatal=False)
if not subs or 'Status="NotFound"' in subs:
return {}
subs = self._parse_json(subs, video_id, fatal=False)
if not subs:
return {}
fixed_subs = self._fix_subtitles(subs)
if fixed_subs:
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
return {}
class LyndaCourseIE(LyndaBaseIE):
IE_NAME = 'lynda:course'
IE_DESC = 'lynda.com online courses'
# Course link equals to welcome/introduction video link of same course
# We will recognize it as course link
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
_TESTS = [{
'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
'only_matching': True,
}, {
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
item_template = f'https://www.lynda.com/{course_path}/%s-4.html'
course = self._download_json(
f'https://www.lynda.com/ajax/player?courseId={course_id}&type=course',
course_id, 'Downloading course JSON', fatal=False)
if not course:
webpage = self._download_webpage(url, course_id)
entries = [
self.url_result(
item_template % video_id, ie=LyndaIE.ie_key(),
video_id=video_id)
for video_id in re.findall(
r'data-video-id=["\'](\d+)', webpage)]
return self.playlist_result(
entries, course_id,
self._og_search_title(webpage, fatal=False),
self._og_search_description(webpage))
if course.get('Status') == 'NotFound':
raise ExtractorError(
f'Course {course_id} does not exist', expected=True)
unaccessible_videos = 0
entries = []
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
for chapter in course['Chapters']:
for video in chapter.get('Videos', []):
if video.get('HasAccess') is False:
unaccessible_videos += 1
continue
video_id = video.get('ID')
if video_id:
entries.append({
'_type': 'url_transparent',
'url': item_template % video_id,
'ie_key': LyndaIE.ie_key(),
'chapter': chapter.get('Title'),
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
'chapter_id': str(chapter.get('ID')),
})
if unaccessible_videos > 0:
self.report_warning(
f'{unaccessible_videos} videos are only available for members (or paid members) '
f'and will not be downloaded. {self._ACCOUNT_CREDENTIALS_HINT}')
course_title = course.get('Title')
course_description = course.get('Description')
return self.playlist_result(entries, course_id, course_title, course_description)

View File

@ -1,121 +0,0 @@
import base64
from .common import InfoExtractor
from ..utils import (
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_get,
url_basename,
)
class MicrosoftStreamIE(InfoExtractor):
IE_NAME = 'microsoftstream'
IE_DESC = 'Microsoft Stream'
_VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0',
'only_matching': True,
}, {
'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca',
'only_matching': True,
}]
def _get_all_subtitles(self, api_url, video_id, headers):
subtitles = {}
automatic_captions = {}
text_tracks = self._download_json(
f'{api_url}/videos/{video_id}/texttracks', video_id,
note='Downloading subtitles JSON', fatal=False, headers=headers,
query={'api-version': '1.4-private'}).get('value') or []
for track in text_tracks:
if not track.get('language') or not track.get('url'):
continue
sub_dict = automatic_captions if track.get('autoGenerated') else subtitles
sub_dict.setdefault(track['language'], []).append({
'ext': 'vtt',
'url': track.get('url'),
})
return {
'subtitles': subtitles,
'automatic_captions': automatic_captions,
}
def extract_all_subtitles(self, *args, **kwargs):
if (self.get_param('writesubtitles', False)
or self.get_param('writeautomaticsub', False)
or self.get_param('listsubtitles')):
return self._get_all_subtitles(*args, **kwargs)
return {}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if '<title>Microsoft Stream</title>' not in webpage:
self.raise_login_required(method='cookies')
access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token')
api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url')
headers = {'Authorization': f'Bearer {access_token}'}
video_data = self._download_json(
f'{api_url}/videos/{video_id}', video_id,
headers=headers, query={
'$expand': 'creator,tokens,status,liveEvent,extensions',
'api-version': '1.4-private',
})
video_id = video_data.get('id') or video_id
language = video_data.get('language')
thumbnails = []
for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'):
thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str)
if not thumbnail_url:
continue
thumb = {
'id': thumbnail_id,
'url': thumbnail_url,
}
thumb_name = url_basename(thumbnail_url)
thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
thumb.update(parse_resolution(thumb_name))
thumbnails.append(thumb)
formats = []
for playlist in video_data['playbackUrls']:
if playlist['mimeType'] == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(
playlist['playbackUrl'], video_id,
ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False, headers=headers))
elif playlist['mimeType'] == 'application/dash+xml':
formats.extend(self._extract_mpd_formats(
playlist['playbackUrl'], video_id, mpd_id='dash',
fatal=False, headers=headers))
elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats(
playlist['playbackUrl'], video_id, ism_id='mss',
fatal=False, headers=headers))
formats = [merge_dicts(f, {'language': language}) for f in formats]
return {
'id': video_id,
'title': video_data['name'],
'description': video_data.get('description'),
'uploader': try_get(video_data, lambda x: x['creator']['name'], str),
'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'],
lambda x: x['creator']['id']), str),
'thumbnails': thumbnails,
**self.extract_all_subtitles(api_url, video_id, headers),
'timestamp': parse_iso8601(video_data.get('created')),
'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])),
'webpage_url': f'https://web.microsoftstream.com/video/{video_id}',
'view_count': try_get(video_data, lambda x: x['metrics']['views'], int),
'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int),
'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int),
'formats': formats,
}

View File

@ -1,45 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_codecs,
)
class MinotoIE(InfoExtractor):
_VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
def _real_extract(self, url):
mobj = self._match_valid_url(url)
player_id = mobj.group('player_id') or '1'
video_id = mobj.group('id')
video_data = self._download_json(f'http://play.minoto-video.com/{player_id}/{video_id}.js', video_id)
video_metadata = video_data['video-metadata']
formats = []
for fmt in video_data['video-files']:
fmt_url = fmt.get('url')
if not fmt_url:
continue
container = fmt.get('container')
if container == 'hls':
formats.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
fmt_profile = fmt.get('profile') or {}
formats.append({
'format_id': fmt_profile.get('name-short'),
'format_note': fmt_profile.get('name'),
'url': fmt_url,
'container': container,
'tbr': int_or_none(fmt.get('bitrate')),
'filesize': int_or_none(fmt.get('filesize')),
'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')),
**parse_codecs(fmt.get('codecs')),
})
return {
'id': video_id,
'title': video_metadata['title'],
'description': video_metadata.get('description'),
'thumbnail': video_metadata.get('video-poster', {}).get('url'),
'formats': formats,
}

View File

@ -1,52 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
)
class MojvideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
'info_dict': {
'id': '3d1ed4497707730b2906',
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
'ext': 'mp4',
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
'thumbnail': r're:^http://.*\.jpg$',
'duration': 242,
},
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
# XML is malformed
playerapi = self._download_webpage(
f'http://www.mojvideo.com/playerapi.php?v={video_id}&t=1', display_id)
if '<error>true</error>' in playerapi:
error_desc = self._html_search_regex(
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
raise ExtractorError(f'{self.IE_NAME} said: {error_desc}', expected=True)
title = self._html_extract_title(playerapi)
video_url = self._html_search_regex(
r'<file>([^<]+)</file>', playerapi, 'video URL')
thumbnail = self._html_search_regex(
r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
duration = parse_duration(self._html_search_regex(
r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -1,289 +0,0 @@
import datetime as dt
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
remove_end,
str_to_int,
unified_strdate,
)
class MotherlessIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
_TESTS = [{
'url': 'http://motherless.com/EE97006',
'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
'info_dict': {
'id': 'EE97006',
'ext': 'mp4',
'title': 'Dogging blond Brit getting glazed (comp)',
'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
'upload_date': '20230519',
'uploader_id': 'deathbird',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'comment_count': int,
'view_count': int,
'like_count': int,
},
'params': {
# Incomplete cert chains
'nocheckcertificate': True,
},
}, {
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
'info_dict': {
'id': '532291B',
'ext': 'mp4',
'title': 'Amazing girl playing the omegle game, PERFECT!',
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
'skip': '404',
}, {
'url': 'http://motherless.com/g/cosplay/633979F',
'expected_exception': 'ExtractorError',
}, {
'url': 'http://motherless.com/8B4BBC1',
'info_dict': {
'id': '8B4BBC1',
'ext': 'mp4',
'title': 'VIDEO00441.mp4',
'categories': [],
'upload_date': '20160214',
'uploader_id': 'NMWildGirl',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}, {
# see https://motherless.com/videos/recent for recent videos with
# uploaded date in "ago" format
'url': 'https://motherless.com/3C3E2CF',
'info_dict': {
'id': '3C3E2CF',
'ext': 'mp4',
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if any(p in webpage for p in (
'<title>404 - MOTHERLESS.COM<',
">The page you're looking for cannot be found.<",
'<div class="error-page',
)):
raise ExtractorError(f'Video {video_id} does not exist', expected=True)
if '>The content you are trying to view is for friends only.' in webpage:
raise ExtractorError(f'Video {video_id} is for friends only', expected=True)
title = self._html_search_regex(
(r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'video URL', default=None, group='url')
or f'http://cdn4.videos.motherlessmedia.com/videos/{video_id}.mp4?fs=opencloud')
age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex(
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
(r'>([\d,.]+)\s+Favorites<',
r'<strong>Favorited</strong>\s+([^<]+)<'),
webpage, 'like count', fatal=False))
upload_date = unified_strdate(self._search_regex(
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
'upload date', default=None))
if not upload_date:
uploaded_ago = self._search_regex(
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
default=None)
if uploaded_ago:
delta = int(uploaded_ago[:-1])
_AGO_UNITS = {
'h': 'hours',
'd': 'days',
}
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex(
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False)
categories = self._html_search_meta('keywords', webpage, default='')
categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
return {
'id': video_id,
'title': title,
'upload_date': upload_date,
'uploader_id': uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
'categories': categories,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
'age_limit': age_limit,
'url': video_url,
}
class MotherlessPaginatedIE(InfoExtractor):
_EXTRA_QUERY = {}
_PAGE_SIZE = 60
def _correct_path(self, url, item_id):
raise NotImplementedError('This method must be implemented by subclasses')
def _correct_title(self, title, /):
return title.partition(' - Videos')[0] if title else None
def _extract_entries(self, webpage, base):
for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
webpage):
video_url = urllib.parse.urljoin(base, mobj.group('href'))
video_id = MotherlessIE.get_temp_id(video_url)
if video_id:
yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
def _real_extract(self, url):
item_id = self._match_id(url)
real_url = self._correct_path(url, item_id)
webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
yield from self._extract_entries(current_page, real_url)
return self.playlist_result(
OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
self._correct_title(self._html_extract_title(webpage)))
class MotherlessGroupIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
},
'playlist_mincount': 540,
}, {
'url': 'http://motherless.com/g/sex_must_be_funny',
'info_dict': {
'id': 'sex_must_be_funny',
'title': 'Sex must be funny',
},
'playlist_count': 0,
}, {
'url': 'https://motherless.com/gv/beautiful_cock',
'info_dict': {
'id': 'beautiful_cock',
'title': 'Beautiful Cock',
},
'playlist_mincount': 371,
}]
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/gv/{item_id}')
class MotherlessGalleryIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://motherless.com/GV338999F',
'info_dict': {
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 100,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
'id': 'ABD6213',
'title': 'Cuties',
},
'playlist_mincount': 1,
}, {
'url': 'https://motherless.com/GVBCF7622',
'info_dict': {
'id': 'BCF7622',
'title': 'Vintage',
},
'playlist_count': 0,
}, {
'url': 'https://motherless.com/G035DE2F',
'info_dict': {
'id': '035DE2F',
'title': 'General',
},
'playlist_mincount': 234,
}]
def _correct_title(self, title, /):
return remove_end(title, ' | MOTHERLESS.COM ™')
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')
class MotherlessUploaderIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://motherless.com/u/Mrgo4hrs2023',
'info_dict': {
'id': 'Mrgo4hrs2023',
'title': "Mrgo4hrs2023's Uploads",
},
'playlist_mincount': 32,
}, {
'url': 'https://motherless.com/u/Happy_couple?t=v',
'info_dict': {
'id': 'Happy_couple',
'title': "Happy_couple's Uploads",
},
'playlist_mincount': 8,
}]
_EXTRA_QUERY = {'t': 'v'}
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')

View File

@ -1,43 +0,0 @@
from .jixie import JixieBaseIE
class MoviewPlayIE(JixieBaseIE):
_VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
_TESTS = [
{
# drm hls, only use direct link
'url': 'https://www.moview.id/play/174/Candy-Monster',
'info_dict': {
'id': '146182',
'ext': 'mp4',
'display_id': 'Candy-Monster',
'uploader_id': 'Mo165qXUUf',
'duration': 528.2,
'title': 'Candy Monster',
'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
},
}, {
# non-drm hls
'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
'info_dict': {
'id': '28210',
'ext': 'mp4',
'duration': 2595.666667,
'display_id': 'Paris-Van-Java-Episode-16',
'uploader_id': 'Mo165qXUUf',
'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
'title': 'Paris Van Java Episode 16',
},
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
return self._extract_data_from_jixie_id(display_id, video_id, webpage)

View File

@ -1,38 +0,0 @@
from .common import InfoExtractor
class MoviezineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
_TEST = {
'url': 'http://www.moviezine.se/video/205866',
'info_dict': {
'id': '205866',
'ext': 'mp4',
'title': 'Oculus - Trailer 1',
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
'thumbnail': r're:http://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage(f'http://www.moviezine.se/api/player.js?video={video_id}', video_id, 'Downloading js api player')
formats = [{
'format_id': 'sd',
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
'quality': 0,
'ext': 'mp4',
}]
return {
'id': video_id,
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@ -1,174 +0,0 @@
from .common import InfoExtractor
from ..utils import (
date_from_str,
format_field,
try_get,
unified_strdate,
)
class MusicdexBaseIE(InfoExtractor):
def _return_info(self, track_json, album_json, video_id):
return {
'id': str(video_id),
'title': track_json.get('name'),
'track': track_json.get('name'),
'description': track_json.get('description'),
'track_number': track_json.get('number'),
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
'duration': track_json.get('duration'),
'genres': [genre.get('name') for genre in track_json.get('genres') or []],
'like_count': track_json.get('likes_count'),
'view_count': track_json.get('plays'),
'artists': [artist.get('name') for artist in track_json.get('artists') or []],
'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
'album': album_json.get('name'),
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
'extractor_key': MusicdexSongIE.ie_key(),
'extractor': 'MusicdexSong',
}
class MusicdexSongIE(MusicdexBaseIE):
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.musicdex.org/track/306/dual-existence',
'info_dict': {
'id': '306',
'ext': 'mp3',
'title': 'dual existence',
'description': '#NIPPONSEI @ IRC.RIZON.NET',
'track': 'dual existence',
'track_number': 1,
'duration': 266000,
'genres': ['Anime'],
'like_count': int,
'view_count': int,
'artists': ['fripSide'],
'album_artists': ['fripSide'],
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
'release_year': 2020,
},
'params': {'skip_download': True},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data_json = self._download_json(
f'https://www.musicdex.org/secure/tracks/{video_id}?defaultRelations=true', video_id)['track']
return self._return_info(data_json, data_json.get('album') or {}, video_id)
class MusicdexAlbumIE(MusicdexBaseIE):
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
'playlist_mincount': 28,
'info_dict': {
'id': '56',
'genres': ['OST'],
'view_count': int,
'artists': ['TENMON & Eiichiro Yanagi / minori'],
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
'release_year': 2008,
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
},
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
data_json = self._download_json(
f'https://www.musicdex.org/secure/albums/{playlist_id}?defaultRelations=true', playlist_id)['album']
entries = [self._return_info(track, data_json, track['id'])
for track in data_json.get('tracks') or [] if track.get('id')]
return {
'_type': 'playlist',
'id': playlist_id,
'title': data_json.get('name'),
'description': data_json.get('description'),
'genres': [genre.get('name') for genre in data_json.get('genres') or []],
'view_count': data_json.get('plays'),
'artists': [artist.get('name') for artist in data_json.get('artists') or []],
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
'entries': entries,
}
class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
def _entries(self, playlist_id):
next_page_url = self._API_URL % playlist_id
while next_page_url:
data_json = self._download_json(next_page_url, playlist_id)['pagination']
yield from data_json.get('data') or []
next_page_url = data_json.get('next_page_url')
class MusicdexArtistIE(MusicdexPageIE):
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
_API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
_TESTS = [{
'url': 'https://www.musicdex.org/artist/11/fripside',
'playlist_mincount': 28,
'info_dict': {
'id': '11',
'view_count': int,
'title': 'fripSide',
'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
},
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{playlist_id}', playlist_id)['artist']
entries = []
for album in self._entries(playlist_id):
entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
return {
'_type': 'playlist',
'id': playlist_id,
'title': data_json.get('name'),
'view_count': data_json.get('plays'),
'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
'entries': entries,
}
class MusicdexPlaylistIE(MusicdexPageIE):
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
_API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
_TESTS = [{
'url': 'https://www.musicdex.org/playlist/9/test',
'playlist_mincount': 73,
'info_dict': {
'id': '9',
'view_count': int,
'title': 'Test',
'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
'description': 'Test 123 123 21312 32121321321321312',
},
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{playlist_id}', playlist_id)['playlist']
entries = [self._return_info(track, track.get('album') or {}, track['id'])
for track in self._entries(playlist_id) or [] if track.get('id')]
return {
'_type': 'playlist',
'id': playlist_id,
'title': data_json.get('name'),
'description': data_json.get('description'),
'view_count': data_json.get('plays'),
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
'entries': entries,
}

View File

@ -1,64 +1,4 @@
from .common import InfoExtractor
from .fox import FOXIE
from ..utils import (
smuggle_url,
url_basename,
)
class NationalGeographicVideoIE(InfoExtractor):
IE_NAME = 'natgeo:video'
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
_TESTS = [
{
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
'md5': '730855d559abbad6b42c2be1fa584917',
'info_dict': {
'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
'ext': 'mp4',
'title': 'Mating Crabs Busted by Sharks',
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
'timestamp': 1423523799,
'upload_date': '20150209',
'uploader': 'NAGS',
},
'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
},
{
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
'md5': '6a3105eb448c070503b3105fb9b320b5',
'info_dict': {
'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
'ext': 'mp4',
'title': 'The Real Jaws',
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
'timestamp': 1433772632,
'upload_date': '20150608',
'uploader': 'NAGS',
},
'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
},
]
def _real_extract(self, url):
name = url_basename(url)
webpage = self._download_webpage(url, name)
guid = self._search_regex(
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
webpage, 'guid')
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
f'http://link.theplatform.com/s/ngs/media/guid/2423130747/{guid}?mbr=true',
{'force_smil_url': True}),
'id': guid,
}
class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE

View File

@ -13,11 +13,9 @@ from ..utils import (
dict_get,
int_or_none,
join_nonempty,
merge_dicts,
parse_iso8601,
traverse_obj,
try_get,
unified_timestamp,
update_url_query,
url_or_none,
)
@ -284,142 +282,3 @@ class NaverLiveIE(NaverBaseIE):
}), get_all=False),
'is_live': True,
}
class NaverNowIE(NaverBaseIE):
IE_NAME = 'navernow'
_VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)'
_API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4'
_TESTS = [{
'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=',
'md5': 'e05854162c21c221481de16b2944a0bc',
'info_dict': {
'id': '4759-26331132',
'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1650369600,
'upload_date': '20220419',
'uploader_id': 'now',
'view_count': int,
'uploader_url': 'https://now.naver.com/show/4759',
'uploader': '아이키의 떰즈업',
},
'params': {
'noplaylist': True,
},
}, {
'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
'md5': '9f6118e398aa0f22b2152f554ea7851b',
'info_dict': {
'id': '4759-26601461',
'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20220504',
'timestamp': 1651648311,
'uploader_id': 'now',
'view_count': int,
'uploader_url': 'https://now.naver.com/show/4759',
'uploader': '아이키의 떰즈업',
},
'params': {
'noplaylist': True,
},
}, {
'url': 'https://now.naver.com/s/now.4759',
'info_dict': {
'id': '4759',
'title': '아이키의 떰즈업',
},
'playlist_mincount': 101,
}, {
'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay',
'info_dict': {
'id': '4759',
'title': '아이키의 떰즈업',
},
'playlist_mincount': 101,
}, {
'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
'info_dict': {
'id': '4759',
'title': '아이키의 떰즈업',
},
'playlist_mincount': 101,
}, {
'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay',
'only_matching': True,
}]
def _extract_replay(self, show_id, replay_id):
vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id)
in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
return merge_dicts({
'id': f'{show_id}-{replay_id}',
'title': traverse_obj(vod_info, ('episode', 'title')),
'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
'thumbnail': vod_info.get('thumbnail_image_url'),
}, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
def _extract_show_replays(self, show_id):
page_size = 15
page = 1
while True:
show_vod_info = self._download_json(
f'{self._API_URL}/vod-shows/now.{show_id}', show_id,
query={'page': page, 'page_size': page_size},
note=f'Downloading JSON vod list for show {show_id} - page {page}',
)['response']['result']
for v in show_vod_info.get('vod_list') or []:
yield self._extract_replay(show_id, v['id'])
if len(show_vod_info.get('vod_list') or []) < page_size:
break
page += 1
def _extract_show_highlights(self, show_id, highlight_id=None):
page_size = 10
page = 1
while True:
highlights_videos = self._download_json(
f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id,
query={'page': page, 'page_size': page_size},
note=f'Downloading JSON highlights for show {show_id} - page {page}')
for highlight in highlights_videos.get('results') or []:
if highlight_id and highlight.get('clip_no') != int(highlight_id):
continue
yield merge_dicts({
'id': f'{show_id}-{highlight["clip_no"]}',
'title': highlight.get('title'),
'timestamp': unified_timestamp(highlight.get('regdate')),
'thumbnail': highlight.get('thumbnail_url'),
}, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey']))
if len(highlights_videos.get('results') or []) < page_size:
break
page += 1
def _extract_highlight(self, show_id, highlight_id):
try:
return next(self._extract_show_highlights(show_id, highlight_id))
except StopIteration:
raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
def _real_extract(self, url):
show_id = self._match_id(url)
qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
return self._extract_highlight(show_id, qs['shareHightlight'][0])
elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
return self._extract_replay(show_id, qs['shareReplayId'][0])
show_info = self._download_json(
f'{self._API_URL}/shows/now.{show_id}/', show_id,
note=f'Downloading JSON vod list for show {show_id}')
return self.playlist_result(
itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
show_id, show_info.get('title'))

View File

@ -1,38 +0,0 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = {
'url': 'http://www.nerdcubed.co.uk/',
'info_dict': {
'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed',
},
'playlist_mincount': 5500,
}
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url):
video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
return self.playlist_result(
map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
video_id, 'nerdcubed.co.uk feed')

View File

@ -1,281 +0,0 @@
import itertools
from .common import InfoExtractor, SearchInfoExtractor
from .dailymotion import DailymotionIE
from ..utils import smuggle_url, traverse_obj
class NetverseBaseIE(InfoExtractor):
_ENDPOINTS = {
'watch': 'watchvideo',
'video': 'watchvideo',
'webseries': 'webseries',
'season': 'webseason_videos',
}
def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
return self._download_json(
f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
display_id or slug, query=query)
def _get_comments(self, video_id):
last_page_number = None
for i in itertools.count(1):
comment_data = self._download_json(
f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
video_id, data=b'', fatal=False, query={'page': i},
note=f'Downloading JSON comment metadata page {i}') or {}
yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
'id': '_id',
'text': 'comment',
'author_id': 'customer_id',
'author': ('customer', 'name'),
'author_thumbnail': ('customer', 'profile_picture'),
}))
if not last_page_number:
last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
if i >= (last_page_number or 0):
break
class NetverseIE(NetverseBaseIE):
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
_TESTS = [{
# Watch video
'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
'info_dict': {
'id': 'k4yhqUwINAGtmHx3NkL',
'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
'ext': 'mp4',
'season': 'Season 2016',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 22,
'episode': 'Episode 22',
'uploader_id': 'x2ir3vq',
'age_limit': 0,
'tags': [],
'view_count': int,
'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
'duration': 2990,
'upload_date': '20210722',
'timestamp': 1626919804,
'like_count': int,
'uploader': 'Net Prime',
},
}, {
# series
'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
'info_dict': {
'id': 'x88izwc',
'title': 'Jadoo Seorang Model',
'ext': 'mp4',
'season': 'Season 2',
'description': 'md5:8a74f70812cca267e19ee0635f0af835',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 2,
'episode': 'Episode 2',
'view_count': int,
'like_count': int,
'display_id': 'jadoo-seorang-model',
'uploader_id': 'x2ir3vq',
'duration': 635,
'timestamp': 1646372927,
'tags': ['PG069497-hellojadooseason2eps2'],
'upload_date': '20220304',
'uploader': 'Net Prime',
'age_limit': 0,
},
'skip': 'video get Geo-blocked for some country',
}, {
# non www host
'url': 'https://netverse.id/watch/tetangga-baru',
'info_dict': {
'id': 'k4CNGz7V0HJ7vfwZbXy',
'ext': 'mp4',
'title': 'Tetangga Baru',
'season': 'Season 1',
'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 1,
'episode': 'Episode 1',
'timestamp': 1624538169,
'view_count': int,
'upload_date': '20210624',
'age_limit': 0,
'uploader_id': 'x2ir3vq',
'like_count': int,
'uploader': 'Net Prime',
'tags': ['PG008534', 'tetangga', 'Baru'],
'display_id': 'tetangga-baru',
'duration': 1406,
},
}, {
# /video url
'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
'title': 'Namaku Choi Jadoo',
'info_dict': {
'id': 'x887jzz',
'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'season': 'Season 1',
'episode_number': 1,
'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
'title': 'Namaku Choi Jadoo',
'episode': 'Episode 1',
'age_limit': 0,
'like_count': int,
'view_count': int,
'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
'duration': 780,
'display_id': 'pg067482-hellojadoo-season1',
'uploader_id': 'x2ir3vq',
'uploader': 'Net Prime',
'timestamp': 1645764984,
'upload_date': '20220225',
},
'skip': 'This video get Geo-blocked for some country',
}, {
# video with comments
'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
'info_dict': {
'id': 'k6hetBPiQMljSxxvAy7',
'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'display_id': 'episode-1-season-2016-ok-food',
'like_count': int,
'description': '',
'duration': 1471,
'age_limit': 0,
'timestamp': 1642405848,
'episode_number': 1,
'season': 'Season 2016',
'uploader_id': 'x2ir3vq',
'title': 'Episode 1 - Season 2016 - Ok Food',
'upload_date': '20220117',
'tags': [],
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'comment_count': int,
},
'params': {
'getcomments': True,
},
}, {
# video with multiple page comment
'url': 'https://netverse.id/video/match-island-eps-1-fix',
'info_dict': {
'id': 'x8aznjc',
'ext': 'mp4',
'like_count': int,
'tags': ['Match-Island', 'Pd00111'],
'display_id': 'match-island-eps-1-fix',
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'duration': 4070,
'timestamp': 1653068165,
'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
'age_limit': 0,
'title': 'Welcome To Match Island',
'upload_date': '20220520',
'episode_number': 1,
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'uploader_id': 'x2ir3vq',
'season': 'Season 1',
'comment_count': int,
},
'params': {
'getcomments': True,
},
}]
def _real_extract(self, url):
display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
program_json = self._call_api(display_id, sites_type)
videos = program_json['response']['videos']
return {
'_type': 'url_transparent',
'ie_key': DailymotionIE.ie_key(),
'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
'display_id': display_id,
'title': videos.get('title'),
'season': videos.get('season_name'),
'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
'description': traverse_obj(videos, ('program_detail', 'description')),
'episode_number': videos.get('episode_order'),
'__post_extractor': self.extract_comments(display_id),
}
class NetversePlaylistIE(NetverseBaseIE):
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
_TESTS = [{
# multiple season
'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
'info_dict': {
'id': 'tetangga-masa-gitu',
'title': 'Tetangga Masa Gitu',
},
'playlist_count': 519,
}, {
# single season
'url': 'https://netverse.id/webseries/kelas-internasional',
'info_dict': {
'id': 'kelas-internasional',
'title': 'Kelas Internasional',
},
'playlist_count': 203,
}]
def parse_playlist(self, json_data, playlist_id):
slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
for season in traverse_obj(json_data, ('seasons', ..., 'id')):
playlist_json = self._call_api(
slug_sample, 'season', display_id=playlist_id, season_id=season)
for current_page in range(playlist_json['response']['season_list']['last_page']):
playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
season_id=season, display_id=playlist_id)
for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
def _real_extract(self, url):
playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
playlist_data = self._call_api(playlist_id, sites_type)
return self.playlist_result(
self.parse_playlist(playlist_data['response'], playlist_id),
traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
class NetverseSearchIE(SearchInfoExtractor):
_SEARCH_KEY = 'netsearch'
_TESTS = [{
'url': 'netsearch10:tetangga',
'info_dict': {
'id': 'tetangga',
'title': 'tetangga',
},
'playlist_count': 10,
}]
def _search_results(self, query):
last_page = None
for i in itertools.count(1):
search_data = self._download_json(
'https://api.netverse.id/search/elastic/search', query,
query={'q': query, 'page': i}, note=f'Downloading page {i}')
videos = traverse_obj(search_data, ('response', 'data', ...))
for video in videos:
yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
if not videos or i >= (last_page or 0):
break

View File

@ -1,201 +0,0 @@
import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
UserNotLive,
filter_dict,
int_or_none,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class NuumBaseIE(InfoExtractor):
def _call_api(self, path, video_id, description, query={}):
response = self._download_json(
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata')
if error := response.get('error'):
raise ExtractorError(f'API returned error: {error!r}')
return response['result']
def _get_channel_info(self, channel_name):
return self._call_api(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
})
def _parse_video_data(self, container, extract_formats=True):
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
media_url = traverse_obj(media, (
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
video_id = str(container['media_container_id'])
is_live = media.get('media_status') == 'RUNNING'
formats, subtitles = None, None
headers = {'Referer': 'https://nuum.ru/'}
if extract_formats:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live, headers=headers)
return filter_dict({
'id': video_id,
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(container, {
'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'channel': ('media_container_channel', 'channel_name', {str}),
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
}),
**traverse_obj(stream, {
'view_count': ('stream_total_viewers', {int_or_none}),
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
}),
**traverse_obj(media, {
'duration': ('media_duration', {int_or_none}),
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
}, get_all=False),
})
class NuumMediaIE(NuumBaseIE):
IE_NAME = 'nuum:media'
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
_TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
'info_dict': {
'id': '1567547',
'ext': 'mp4',
'title': 'Toxi$ - Hurtz',
'description': '',
'timestamp': 1702631651,
'upload_date': '20231215',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '6911',
'channel': 'toxis',
'duration': 116,
},
}, {
'url': 'https://nuum.ru/clips/1552564-pro-misu',
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
'info_dict': {
'id': '1552564',
'ext': 'mp4',
'title': 'Про Мису 🙃',
'timestamp': 1701971828,
'upload_date': '20231207',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '3320',
'channel': 'Misalelik',
'duration': 41,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
return self._parse_video_data(video_data)
class NuumLiveIE(NuumBaseIE):
IE_NAME = 'nuum:live'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://nuum.ru/channel/mts_live',
'only_matching': True,
}]
def _real_extract(self, url):
channel = self._match_id(url)
channel_info = self._get_channel_info(channel)
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
raise UserNotLive(video_id=channel)
info = self._parse_video_data(channel_info['media_container'])
return {
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
'extractor_key': NuumMediaIE.ie_key(),
'extractor': NuumMediaIE.IE_NAME,
**info,
}
class NuumTabIE(NuumBaseIE):
IE_NAME = 'nuum:tab'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
_TESTS = [{
'url': 'https://nuum.ru/channel/dankon_/clips',
'info_dict': {
'id': 'dankon__clips',
'title': 'Dankon_',
},
'playlist_mincount': 29,
}, {
'url': 'https://nuum.ru/channel/dankon_/videos',
'info_dict': {
'id': 'dankon__videos',
'title': 'Dankon_',
},
'playlist_mincount': 2,
}, {
'url': 'https://nuum.ru/channel/dankon_/streams',
'info_dict': {
'id': 'dankon__streams',
'title': 'Dankon_',
},
'playlist_mincount': 1,
}]
_PAGE_SIZE = 50
def _fetch_page(self, channel_id, tab_type, tab_id, page):
CONTAINER_TYPES = {
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
'videos': ['LONG_VIDEO'],
'streams': ['SINGLE'],
}
media_containers = self._call_api(
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
query={
'limit': self._PAGE_SIZE,
'offset': page * self._PAGE_SIZE,
'channel_id': channel_id,
'media_container_status': 'STOPPED',
'media_container_type': CONTAINER_TYPES[tab_type],
})
for container in traverse_obj(media_containers, (..., {dict})):
metadata = self._parse_video_data(container, extract_formats=False)
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
def _real_extract(self, url):
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
tab_id = f'{channel_name}_{tab_type}'
channel_data = self._get_channel_info(channel_name)['channel']
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))

View File

@ -1,41 +0,0 @@
from .common import InfoExtractor
from ..utils import js_to_json
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1']
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': '5a118d466d62b5cd03647cf2c593977f',
'info_dict': {
'id': '3459881',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'a.v. club',
'upload_date': '20150619',
'timestamp': 1434728546,
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.onionstudios.com/video/6139.json',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
video_id)
mcp_id = str(self._parse_json(self._search_regex(
r'window\.mcpMapping\s*=\s*({.+?});', webpage,
'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
return self.url_result(
'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
'KinjaEmbed', mcp_id)

View File

@ -1,72 +0,0 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
get_element_by_attribute,
qualities,
unescapeHTML,
)
class OraTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
_TESTS = [{
'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
'md5': 'fa33717591c631ec93b04b0e330df786',
'info_dict': {
'id': '50178',
'ext': 'mp4',
'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
},
}, {
'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._search_regex(
r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video')
m3u8_url = self._search_regex(
r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
if m3u8_url:
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
# similar to GameSpotIE
m3u8_path = urllib.parse.urlparse(m3u8_url).path
QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
available_qualities = self._search_regex(
QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',')
http_path = m3u8_path[1:].split('/', 1)[1]
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
http_template = http_template.replace('.csmil/master.m3u8', '')
http_template = urllib.parse.urljoin(
'http://videocdn-pmd.ora.tv/', http_template)
preference = qualities(
['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080'])
for q in available_qualities:
formats.append({
'url': http_template % q,
'format_id': q,
'quality': preference(q),
})
else:
return self.url_result(self._search_regex(
r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
return {
'id': self._search_regex(
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
'display_id': display_id,
'title': unescapeHTML(self._og_search_title(webpage)),
'description': get_element_by_attribute(
'class', 'video_txt_decription', webpage),
'thumbnail': self._proto_relative_url(self._search_regex(
r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
'formats': formats,
}

View File

@ -1,99 +0,0 @@
from .common import InfoExtractor
from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none
from ..utils.traversal import traverse_obj
class PiramideTVIE(InfoExtractor):
_VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://piramide.tv/video/wWtBAORdJUTh',
'info_dict': {
'id': 'wWtBAORdJUTh',
'ext': 'mp4',
'title': 'md5:79f9c8183ea6a35c836923142cf0abcc',
'description': '',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg',
'channel': 'León Picarón',
'channel_id': 'leonpicaron',
'timestamp': 1696460362,
'upload_date': '20231004',
},
}, {
'url': 'https://piramide.tv/video/wcYn6li79NgN',
'info_dict': {
'id': 'wcYn6li79NgN',
'ext': 'mp4',
'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1',
'description': '',
'channel': 'ARTA GAME',
'channel_id': 'arta_game',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg',
'timestamp': 1703434976,
'upload_date': '20231224',
},
}]
def _extract_video(self, video_id):
video_data = self._download_json(
f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False)
next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str}))
return next_video, {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(video_data, ('video', {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('media', 'thumbnail', {url_or_none}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'id', {str}),
'timestamp': ('date', {parse_iso8601}),
})),
}
def _entries(self, video_id):
visited = set()
while True:
visited.add(video_id)
next_video, info = self._extract_video(video_id)
yield info
if not next_video or next_video in visited:
break
video_id = next_video
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
if self._yes_playlist(video_id, video_id, smuggled_data):
return self.playlist_result(self._entries(video_id), video_id)
return self._extract_video(video_id)[1]
class PiramideTVChannelIE(InfoExtractor):
_VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://piramide.tv/channel/thekalo',
'playlist_mincount': 10,
'info_dict': {
'id': 'thekalo',
},
}]
def _entries(self, channel_name):
videos = self._download_json(
f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name)
for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])):
yield self.url_result(smuggle_url(
f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}),
**traverse_obj(video, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
}))
def _real_extract(self, url):
channel_name = self._match_id(url)
return self.playlist_result(self._entries(channel_name), channel_name)

View File

@ -1,72 +0,0 @@
from .common import InfoExtractor
from ..utils import (
try_get,
unified_strdate,
)
class PlanetMarathiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
_TESTS = [{
'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
'playlist_mincount': 2,
'info_dict': {
'id': 'ek-unad-divas',
},
'playlist': [{
'info_dict': {
'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas',
'ext': 'mp4',
'title': 'ek unad divas',
'alt_title': 'चित्रपट',
'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881',
'episode_number': 1,
'duration': 5539,
'upload_date': '20210829',
},
}], # Trailer skipped
}, {
'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1',
'playlist_mincount': 10,
'info_dict': {
'id': 'baap-beep-baap-season-1',
},
'playlist': [{
'info_dict': {
'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1',
'ext': 'mp4',
'title': 'Manohar Kanhere',
'alt_title': 'मनोहर कान्हेरे',
'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6',
'season_number': 1,
'episode_number': 1,
'duration': 29,
'upload_date': '20210829',
},
}], # Trailers, Episodes, other Character profiles skipped
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
entries = []
json_data = self._download_json(
f'https://www.planetmarathi.com/api/v1/titles/{playlist_id}/assets', playlist_id)['assets']
for asset in json_data:
asset_title = asset['mediaAssetName']['en']
if asset_title == 'Movie':
asset_title = playlist_id.replace('-', ' ')
asset_id = f'{asset["sk"]}_{playlist_id}'.replace('#', '-')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id)
entries.append({
'id': asset_id,
'title': asset_title,
'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']),
'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']),
'season_number': asset.get('mediaAssetSeason'),
'episode_number': asset.get('mediaAssetIndexForAssetType'),
'duration': asset.get('mediaAssetDurationInSeconds'),
'upload_date': unified_strdate(asset.get('created')),
'formats': formats,
'subtitles': subtitles,
})
return self.playlist_result(entries, playlist_id=playlist_id)

View File

@ -1,100 +0,0 @@
import json
from .common import InfoExtractor
from ..networking import PUTRequest
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, clean_html, int_or_none
class PlayPlusTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
_TEST = {
'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
'info_dict': {
'id': 'db8d274a5163424e967f35a30ddafb8e',
'ext': 'mp4',
'title': 'Capítulo 179 - Final',
'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
'timestamp': 1529992740,
'upload_date': '20180626',
},
'skip': 'Requires account credential',
}
_NETRC_MACHINE = 'playplustv'
_GEO_COUNTRIES = ['BR']
_token = None
_profile_id = None
def _call_api(self, resource, video_id=None, query=None):
return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
'Authorization': 'Bearer ' + self._token,
}, query=query)
def _perform_login(self, username, password):
req = PUTRequest(
'https://api.playplus.tv/api/web/login', json.dumps({
'email': username,
'password': password,
}).encode(), {
'Content-Type': 'application/json; charset=utf-8',
})
try:
self._token = self._download_json(req, None)['token']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError(self._parse_json(
e.cause.response.read(), None)['errorMessage'], expected=True)
raise
self._profile = self._call_api('Profiles')['list'][0]['_id']
def _real_initialize(self):
if not self._token:
self.raise_login_required(method='password')
def _real_extract(self, url):
project_id, media_id = self._match_valid_url(url).groups()
media = self._call_api(
'Media', media_id, {
'profileId': self._profile,
'projectId': project_id,
'mediaId': media_id,
})['obj']
title = media['title']
formats = []
for f in media.get('files', []):
f_url = f.get('url')
if not f_url:
continue
file_info = f.get('fileInfo') or {}
formats.append({
'url': f_url,
'width': int_or_none(file_info.get('width')),
'height': int_or_none(file_info.get('height')),
})
thumbnails = []
for thumb in media.get('thumbs', []):
thumb_url = thumb.get('url')
if not thumb_url:
continue
thumbnails.append({
'url': thumb_url,
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
})
return {
'id': media_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': clean_html(media.get('description')) or media.get('shortDescription'),
'timestamp': int_or_none(media.get('publishDate'), 1000),
'view_count': int_or_none(media.get('numberOfViews')),
'comment_count': int_or_none(media.get('numberOfComments')),
'tags': media.get('tags'),
}

View File

@ -1,79 +0,0 @@
from .common import InfoExtractor
from ..utils import (
dict_get,
float_or_none,
)
class PlaywireIE(InfoExtractor):
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
_EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1']
_TESTS = [{
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
'md5': 'e6398701e3595888125729eaa2329ed9',
'info_dict': {
'id': '3353705',
'ext': 'mp4',
'title': 'S04_RM_UCL_Rus',
'thumbnail': r're:^https?://.*\.png$',
'duration': 145.94,
},
'skip': 'Invalid URL',
}, {
# m3u8 in f4m
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
'info_dict': {
'id': '4840492',
'ext': 'mp4',
'title': 'ITV EL SHOW FULL',
},
'skip': 'Invalid URL',
}, {
# Multiple resolutions while bitrates missing
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
'only_matching': True,
}, {
'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
'only_matching': True,
}, {
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
'info_dict': {
'id': '3519514',
'ext': 'mp4',
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
},
'skip': 'Site no longer embeds Playwire',
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
player = self._download_json(
f'http://config.playwire.com/{publisher_id}/videos/v2/{video_id}/zeus.json',
video_id)
title = player['settings']['title']
duration = float_or_none(player.get('duration'), 1000)
content = player['content']
thumbnail = content.get('poster')
src = content['media']['f4m']
formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
for a_format in formats:
if not dict_get(a_format, ['tbr', 'width', 'height']):
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -1,130 +0,0 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
parse_resolution,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PlVideoIE(InfoExtractor):
IE_DESC = 'Платформа'
_VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
'info_dict': {
'id': 'Y5JzUzkcQTMK',
'ext': 'mp4',
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
'channel': 'RT en Español',
'channel_id': 'ZH4EKqunVDvo',
'media_type': 'video',
'comment_count': int,
'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
'description': 'md5:a1a395d900d77a86542a91ee0826c115',
'release_timestamp': 1715096124,
'channel_is_verified': True,
'like_count': int,
'timestamp': 1715095911,
'duration': 44320,
'view_count': int,
'dislike_count': int,
'upload_date': '20240507',
'modified_date': '20240701',
'channel_follower_count': int,
'modified_timestamp': 1719824073,
},
}, {
'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
'info_dict': {
'id': 'S3Uo9c-VLwFX',
'ext': 'mp4',
'channel': 'Romaatom',
'tags': 'count:22',
'dislike_count': int,
'upload_date': '20241130',
'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
'duration': 58433,
'modified_date': '20241130',
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
'media_type': 'shorts',
'like_count': int,
'modified_timestamp': 1732961458,
'channel_is_verified': True,
'channel_id': 'erJyyTIbmUd1',
'timestamp': 1732961355,
'comment_count': int,
'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
'channel_follower_count': int,
'view_count': int,
'release_timestamp': 1732961458,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
is_live = False
formats = []
subtitles = {}
automatic_captions = {}
for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
formats.append({
'format_id': quality,
'ext': 'mp4',
'protocol': 'm3u8_native',
**traverse_obj(data, {
'url': 'hls',
'fps': ('fps', {float_or_none}),
'aspect_ratio': ('aspectRatio', {float_or_none}),
}),
**parse_resolution(quality),
})
if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
is_live = True
formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
if lang.endswith('-auto'):
automatic_captions.setdefault(lang[:-5], []).append({
'url': url,
})
else:
subtitles.setdefault(lang, []).append({
'url': url,
})
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'automatic_captions': automatic_captions,
'is_live': is_live,
**traverse_obj(video_data, ('item', {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
'duration': ('uploadFile', 'videoDuration', {int_or_none}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'id', {str}),
'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
'channel_is_verified': ('channel', 'verified', {bool}),
'tags': ('tags', ..., {str}),
'timestamp': ('createdAt', {parse_iso8601}),
'release_timestamp': ('publishedAt', {parse_iso8601}),
'modified_timestamp': ('updatedAt', {parse_iso8601}),
'view_count': ('stats', 'viewTotalCount', {int_or_none}),
'like_count': ('stats', 'likeCount', {int_or_none}),
'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
'comment_count': ('stats', 'commentCount', {int_or_none}),
'media_type': ('type', {str}),
})),
}

View File

@ -1,496 +0,0 @@
import hashlib
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
unified_strdate,
)
class ProSiebenSat1BaseIE(InfoExtractor):
_GEO_BYPASS = False
_ACCESS_ID = None
_SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
_V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
def _extract_video_info(self, url, clip_id):
client_location = url
video = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos',
clip_id, 'Downloading videos JSON', query={
'access_token': self._TOKEN,
'client_location': client_location,
'client_name': self._CLIENT_NAME,
'ids': clip_id,
})[0]
if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
self.report_drm(clip_id)
formats = []
if self._ACCESS_ID:
raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
protocols = self._download_json(
self._V4_BASE_URL + 'protocols', clip_id,
'Downloading protocols JSON',
headers=self.geo_verification_headers(), query={
'access_id': self._ACCESS_ID,
'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(),
'video_id': clip_id,
}, fatal=False, expected_status=(403,)) or {}
error = protocols.get('error') or {}
if error.get('title') == 'Geo check failed':
self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
server_token = protocols.get('server_token')
if server_token:
urls = (self._download_json(
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
'access_id': self._ACCESS_ID,
'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
'protocols': self._SUPPORTED_PROTOCOLS,
'server_token': server_token,
'video_id': clip_id,
}, fatal=False) or {}).get('urls') or {}
for protocol, variant in urls.items():
source_url = variant.get('clear', {}).get('url')
if not source_url:
continue
if protocol == 'dash':
formats.extend(self._extract_mpd_formats(
source_url, clip_id, mpd_id=protocol, fatal=False))
elif protocol == 'hls':
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id=protocol, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': protocol,
})
if not formats:
source_ids = [str(source['id']) for source in video['sources']]
client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode()).hexdigest()
sources = self._download_json(
f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources',
clip_id, 'Downloading sources JSON', query={
'access_token': self._TOKEN,
'client_id': client_id,
'client_location': client_location,
'client_name': self._CLIENT_NAME,
})
server_id = sources['server_id']
def fix_bitrate(bitrate):
bitrate = int_or_none(bitrate)
if not bitrate:
return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source_id in source_ids:
client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode()).hexdigest()
urls = self._download_json(
f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources/url',
clip_id, 'Downloading urls JSON', fatal=False, query={
'access_token': self._TOKEN,
'client_id': client_id,
'client_location': client_location,
'client_name': self._CLIENT_NAME,
'server_id': server_id,
'source_ids': source_id,
})
if not urls:
continue
if urls.get('status_code') != 0:
raise ExtractorError('This video is unavailable', expected=True)
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
for source in urls_sources:
source_url = source.get('url')
if not source_url:
continue
protocol = source.get('protocol')
mimetype = source.get('mimetype')
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, clip_id, f4m_id='hds', fatal=False))
elif mimetype == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif mimetype == 'application/dash+xml':
formats.extend(self._extract_mpd_formats(
source_url, clip_id, mpd_id='dash', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
continue
path = mobj.group('path')
mp4colon_index = path.rfind('mp4:')
app = path[:mp4colon_index]
play_path = path[mp4colon_index:]
formats.append({
'url': '{}/{}'.format(mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'tbr': tbr,
'ext': 'flv',
'format_id': join_nonempty('rtmp', tbr),
})
else:
formats.append({
'url': source_url,
'tbr': tbr,
'format_id': join_nonempty('http', tbr),
})
return {
'duration': float_or_none(video.get('duration')),
'formats': formats,
}
class ProSiebenSat1IE(ProSiebenSat1BaseIE):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
(?:beta\.)?
(?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
)\.(?:de|at|ch)|
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
)
/(?P<id>.+)
'''
_TESTS = [
{
# Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
# in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
# - malformed f4m manifest support
# - proper handling of URLs starting with `https?://` in 2.0 manifests
# - recursive child f4m manifests extraction
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
'ext': 'mp4',
'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
'series': 'CIRCUS HALLIGALLI',
'season_number': 2,
'episode': 'Episode 18 - Staffel 2',
'episode_number': 18,
},
},
{
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
'info_dict': {
'id': '2570327',
'ext': 'mp4',
'title': 'Lady-Umstyling für Audrina',
'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
'upload_date': '20131014',
'duration': 606.76,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Seems to be broken',
},
{
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
'info_dict': {
'id': '2429369',
'ext': 'mp4',
'title': 'Countdown für die Autowerkstatt',
'description': 'md5:809fc051a457b5d8666013bc40698817',
'upload_date': '20140223',
'duration': 2595.04,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
'info_dict': {
'id': '2904997',
'ext': 'mp4',
'title': 'Sexy laufen in Ugg Boots',
'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
'upload_date': '20140122',
'duration': 245.32,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
'info_dict': {
'id': '2906572',
'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
'upload_date': '20140203',
'duration': 522.56,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
'info_dict': {
'id': '2992323',
'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
'upload_date': '20141014',
'duration': 2410.44,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
'info_dict': {
'id': '3004256',
'ext': 'mp4',
'title': 'Schalke: Tönnies möchte Raul zurück',
'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
'upload_date': '20140226',
'duration': 228.96,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
'info_dict': {
'id': '2572814',
'ext': 'mp4',
'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'timestamp': 1382041620,
'upload_date': '20131017',
'duration': 469.88,
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
'info_dict': {
'id': '2156342',
'ext': 'mp4',
'title': 'Kurztrips zum Valentinstag',
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
'duration': 307.24,
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
'info_dict': {
'id': '439664',
'title': 'Episode 8 - Ganze Folge - Playlist',
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
'skip': 'This video is unavailable',
},
{
# title in <h2 class="subtitle">
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
'info_dict': {
'id': '4895826',
'ext': 'mp4',
'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
'upload_date': '20170302',
},
'params': {
'skip_download': True,
},
'skip': 'geo restricted to Germany',
},
{
# geo restricted to Germany
'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
'only_matching': True,
},
{
# geo restricted to Germany
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
'only_matching': True,
},
{
# geo restricted to Germany
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
'only_matching': True,
},
{
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
'only_matching': True,
},
{
'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
'only_matching': True,
},
]
_TOKEN = 'prosieben'
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
_ACCESS_ID = 'x_prosiebenmaxx-de'
_ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
_IV = 'Aeluchoc6aevechuipiexeeboowedaok'
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
r'clip[iI][dD]\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
r'proMamsId&quot;\s*:\s*&quot;(\d+)',
r'proMamsId"\s*:\s*"(\d+)',
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
r'<!-- start video -->\s*<h1>(.+?)</h1>',
r'<h1 class="att-name">\s*(.+?)</h1>',
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
]
_DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>',
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
r'<p class="att-description">\s*(.+?)\s*</p>',
r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
]
_UPLOAD_DATE_REGEXES = [
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
]
_PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">',
r"'itemType'\s*:\s*'([^']*)'",
]
_PLAYLIST_ID_REGEXES = [
r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'",
]
_PLAYLIST_CLIP_REGEXES = [
r'(?s)data-qvt=.+?<a href="([^"]+)"',
]
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(
self._CLIPID_REGEXES, webpage, 'clip id')
title = self._html_search_regex(
self._TITLE_REGEXES, webpage, 'title',
default=None) or self._og_search_title(webpage)
info = self._extract_video_info(url, clip_id)
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
if description is None:
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(
self._html_search_meta('og:published_time', webpage,
'upload date', default=None)
or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
webpage, 'upload date', default=None))
json_ld = self._search_json_ld(webpage, clip_id, default={})
return merge_dicts(info, {
'id': clip_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
}, json_ld)
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
playlist = self._parse_json(
self._search_regex(
r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
webpage, 'playlist'),
playlist_id)
entries = []
for item in playlist:
clip_id = item.get('id') or item.get('upc')
if not clip_id:
continue
info = self._extract_video_info(url, clip_id)
info.update({
'id': clip_id,
'title': item.get('title') or item.get('teaser', {}).get('headline'),
'description': item.get('teaser', {}).get('description'),
'thumbnail': item.get('poster'),
'duration': float_or_none(item.get('duration')),
'series': item.get('tvShowTitle'),
'uploader': item.get('broadcastPublisher'),
})
entries.append(info)
return self.playlist_result(entries, playlist_id)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_type = self._search_regex(
self._PAGE_TYPE_REGEXES, webpage,
'page type', default='clip').lower()
if page_type == 'clip':
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)
else:
raise ExtractorError(
f'Unsupported page type {page_type}', expected=True)

View File

@ -1,50 +0,0 @@
from .prosiebensat1 import ProSiebenSat1BaseIE
from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
'info_dict': {
'id': '118118',
'ext': 'flv',
'title': 'Tobias Homberger von myclubs im #2min2miotalk',
'description': 'md5:f9def7c5e8745d6026d8885487d91955',
'upload_date': '20160830',
'uploader': 'PULS_4',
},
}, {
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
'only_matching': True,
}, {
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
'only_matching': True,
}]
_TOKEN = 'puls4'
_SALT = '01!kaNgaiNgah1Ie4AeSha'
_CLIENT_NAME = ''
def _real_extract(self, url):
path = self._match_id(url)
content_path = self._download_json(
'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
media = self._download_json(
'http://www.puls4.com' + content_path,
content_path)['mediaCurrent']
player_content = media['playerContent']
info = self._extract_video_info(url, player_content['id'])
info.update({
'id': str(media['objectId']),
'title': player_content['title'],
'description': media.get('description'),
'thumbnail': media.get('previewLink'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(player_content.get('duration')),
'episode': player_content.get('episodePartName'),
'show': media.get('channel'),
'season_id': player_content.get('seasonId'),
'uploader': player_content.get('sourceCompany'),
})
return info

View File

@ -1,154 +0,0 @@
import itertools
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
get_element_text_and_html_by_tag,
get_elements_html_by_class,
int_or_none,
join_nonempty,
try_call,
unified_strdate,
update_url,
urljoin,
)
from ..utils.traversal import traverse_obj
class RadioComercialIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
'info_dict': {
'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas',
'ext': 'mp3',
'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.',
'release_date': '20231025',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 'Season 6',
'season_number': 6,
},
}, {
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem',
'md5': '47e96c273aef96a8eb160cd6cf46d782',
'info_dict': {
'id': 'convenca-me-num-minuto-que-os-lobisomens-existem',
'ext': 'mp3',
'title': 'Convença-me num minuto que os lobisomens existem',
'release_date': '20231026',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 'Season 3',
'season_number': 3,
},
}, {
'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao',
'md5': '69be64255420fec23b7259955d771e54',
'info_dict': {
'id': 'o-desastre-de-aviao',
'ext': 'mp3',
'title': 'O desastre de avião',
'description': 'md5:8a82beeb372641614772baab7246245f',
'release_date': '20231101',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 'Season 2',
'season_number': 2,
},
'params': {
# inconsistant md5
'skip_download': True,
},
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro',
'md5': '91d32d4d4b1407272068b102730fc9fa',
'info_dict': {
'id': 't-n-t-29-de-outubro',
'ext': 'mp3',
'title': 'T.N.T 29 de outubro',
'release_date': '20231029',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 'Season 2023',
'season_number': 2023,
},
}]
def _real_extract(self, url):
video_id, season = self._match_valid_url(url).group('id', 'season')
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'description': self._og_search_description(webpage, default=None),
'release_date': unified_strdate(get_element_by_class(
'date', get_element_html_by_class('descriptions', webpage) or '')),
'thumbnail': self._og_search_thumbnail(webpage),
'season_number': int_or_none(season),
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
}
class RadioComercialPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
'info_dict': {
'id': 'convenca-me-num-minuto_t3',
'title': 'Convença-me num Minuto - Temporada 3',
},
'playlist_mincount': 32,
}, {
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao',
'info_dict': {
'id': 'o-homem-que-mordeu-o-cao',
'title': 'O Homem Que Mordeu o Cão',
},
'playlist_mincount': 19,
}, {
'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas',
'info_dict': {
'id': 'as-minhas-coisas-favoritas',
'title': 'As Minhas Coisas Favoritas',
},
'playlist_mincount': 131,
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023',
'info_dict': {
'id': 'tnt-todos-no-top_t2023',
'title': 'TNT - Todos No Top - Temporada 2023',
},
'playlist_mincount': 39,
}]
def _entries(self, url, playlist_id):
for page in itertools.count(1):
try:
webpage = self._download_webpage(
f'{url}/{page}', playlist_id, f'Downloading page {page}')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
if not episodes:
break
for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
episode_url = urljoin(url, url_path)
if RadioComercialIE.suitable(episode_url):
yield episode_url
def _real_extract(self, url):
podcast, season = self._match_valid_url(url).group('id', 'season')
playlist_id = join_nonempty(podcast, season, delim='_t')
url = update_url(url, query=None, fragment=None)
webpage = self._download_webpage(url, playlist_id)
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
return self.playlist_from_matches(
self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)

View File

@ -1,134 +0,0 @@
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_qs,
update_url_query,
)
from ..utils.traversal import traverse_obj
class RedCDNLivxIE(InfoExtractor):
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
IE_NAME = 'redcdnlivx'
_TESTS = [{
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
'info_dict': {
'id': 'ENC02-638272860000-638292544000',
'ext': 'mp4',
'title': 'ENC02',
'duration': 19683.982,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
'info_dict': {
'id': 'ENC18-722333096000-722335562000',
'ext': 'mp4',
'title': 'ENC18',
'duration': 2463.995,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
'info_dict': {
'id': 'triathlon2018-warsaw-550305000000-550327620000',
'ext': 'mp4',
'title': 'triathlon2018/warsaw',
'duration': 22619.98,
'live_status': 'was_live',
},
}, {
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
'only_matching': True,
}, {
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
'only_matching': True,
}]
'''
Known methods (first in url path):
- `livedash` - DASH MPD
- `livehls` - HTTP Live Streaming
- `livess` - IIS Smooth Streaming
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
- `sc` - shoutcast/icecast (audio streams, like radio)
'''
def _real_extract(self, url):
tenant, path = self._match_valid_url(url).group('tenant', 'id')
qs = parse_qs(url)
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
def livx_mode(mode):
suffix = ''
if mode == 'livess':
suffix = '/manifest'
elif mode == 'livehls':
suffix = '/playlist.m3u8'
file_qs = {}
if start_time:
file_qs['startTime'] = start_time
if stop_time:
file_qs['stopTime'] = stop_time
if mode == 'nvr':
file_qs['nolimit'] = 1
elif mode != 'sc':
file_qs['indexMode'] = 'true'
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
# no id or title for a transmission. making ones up.
title = path \
.replace('/live', '').replace('live/', '') \
.replace('/channel', '').replace('channel/', '') \
.strip('/')
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
formats = []
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
ism_res = self._download_xml_handle(
livx_mode('livess'), video_id,
note='Downloading ISM manifest',
errnote='Failed to download ISM manifest',
fatal=False)
ism_doc = None
if ism_res is not False:
ism_doc, ism_urlh = ism_res
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
nvr_urlh = self._request_webpage(
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
expected_status=lambda _: True)
if nvr_urlh and nvr_urlh.status == 200:
formats.append({
'url': nvr_urlh.url,
'ext': 'flv',
'format_id': 'direct-0',
'preference': -1, # might be slow
})
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
duration = traverse_obj(
ism_doc, ('@Duration', {float_or_none(scale=time_scale)})) or None
live_status = None
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
live_status = 'is_live'
elif duration:
live_status = 'was_live'
return {
'id': video_id,
'title': title,
'formats': formats,
'duration': duration,
'live_status': live_status,
}

View File

@ -1,94 +0,0 @@
from .common import InfoExtractor
from ..utils import extract_attributes, merge_dicts, remove_end
class RheinMainTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
_TESTS = [{
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
'info_dict': {
'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
'ext': 'ismv', # ismv+isma will be merged into mp4
'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
'title': 'Auf dem Weg zur Deutschen Meisterschaft',
'upload_date': '20221108',
'view_count': int,
'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
'timestamp': 1667933057,
'duration': 243.0,
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'timestamp': 1668526214,
'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'view_count': int,
'thumbnail': r're:^https://.+\.jpg',
'duration': 345.0,
'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
'upload_date': '20221115',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Casino Mainz bei den Deutschen Meisterschaften',
'view_count': int,
'timestamp': 1668527402,
'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
'upload_date': '20221115',
'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
'duration': 348.0,
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
video_id = mobj.group('video_id').replace('/', '-')
webpage = self._download_webpage(url, video_id)
source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
webpage, 'video', group=('source', 'img'))
source = extract_attributes(source)
img = extract_attributes(img)
raw_json_ld = list(self._yield_json_ld(webpage, video_id))
json_ld = self._json_ld(raw_json_ld, video_id)
json_ld.pop('url', None)
ism_manifest_url = (
source.get('src')
or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
)
formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
return merge_dicts({
'id': video_id,
'display_id': display_id,
'title':
self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
webpage, 'headline', default=None)
or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
or remove_end(self._html_extract_title(webpage), ' -'),
'alt_title': img.get('alt'),
'description': json_ld.get('description') or self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles,
'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
}, json_ld)

View File

@ -1,69 +0,0 @@
import urllib.parse
from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor
from ..utils import smuggle_url
class RMCDecouverteIE(InfoExtractor):
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/',
'info_dict': {
'id': '6250879771001',
'ext': 'mp4',
'title': 'LES BUNKERS SECRETS D´OMAHA BEACH',
'uploader_id': '1969646226001',
'description': 'md5:aed573ca24abde62a148e0eba909657d',
'timestamp': 1619622984,
'upload_date': '20210428',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
'info_dict': {
'id': '5983675500001',
'ext': 'mp4',
'title': 'CORVETTE',
'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
'uploader_id': '1969646226001',
'upload_date': '20181226',
'timestamp': 1545861635,
},
'params': {
'skip_download': True,
},
'skip': 'only available for a week',
}, {
'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598',
'only_matching': True,
}, {
# The website accepts any URL as long as it has _\d+ at the end
'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598',
'only_matching': True,
}, {
# live, geo restricted, bypassable
'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('id') or 'direct'
webpage = self._download_webpage(url, display_id)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_legacy_url:
brightcove_id = urllib.parse.parse_qs(urllib.parse.urlparse(
brightcove_legacy_url).query)['@videoPlayer'][0]
else:
brightcove_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
return self.url_result(
smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': ['FR']}),
'BrightcoveNew', brightcove_id)

View File

@ -1,7 +1,6 @@
import datetime as dt
from .common import InfoExtractor
from .redge import RedCDNLivxIE
from ..utils import (
clean_html,
join_nonempty,
@ -27,6 +26,7 @@ def rfc3339_to_atende(date):
class SejmIE(InfoExtractor):
_WORKING = False
_VALID_URL = (
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
@ -185,7 +185,7 @@ class SejmIE(InfoExtractor):
entries.append({
**common_info,
'_type': 'url_transparent',
'ie_key': RedCDNLivxIE.ie_key(),
'ie_key': 'redcdnlivx',
'id': stream_id,
'title': join_nonempty(title, stream_id, delim=' - '),
})

View File

@ -1,105 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
determine_protocol,
float_or_none,
int_or_none,
parse_iso8601,
unescapeHTML,
update_url_query,
)
class SendtoNewsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
_TEST = {
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
},
'playlist_count': 8,
# test the first video only to prevent lengthy tests
'playlist': [{
'info_dict': {
'id': '240385',
'ext': 'mp4',
'title': 'Indians introduce Encarnacion',
'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
'duration': 137.898,
'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20170105',
'timestamp': 1483649762,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
@classmethod
def _extract_embed_urls(cls, url, webpage):
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
\1>''', webpage)
if mobj:
sc = mobj.group('SC')
yield cls._URL_TEMPLATE % sc
def _real_extract(self, url):
playlist_id = self._match_id(url)
data_url = update_url_query(
url.replace('embedplayer.php', 'data_read.php'),
{'cmd': 'loadInitial'})
playlist_data = self._download_json(data_url, playlist_id)
entries = []
for video in playlist_data['playlistData'][0]:
info_dict = self._parse_jwplayer_data(
video['jwconfiguration'],
require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
for f in info_dict['formats']:
if f.get('tbr'):
continue
tbr = int_or_none(self._search_regex(
r'/(\d+)k/', f['url'], 'bitrate', default=None))
if not tbr:
continue
f.update({
'format_id': f'{determine_protocol(f)}-{tbr}',
'tbr': tbr,
})
thumbnails = []
if video.get('thumbnailUrl'):
thumbnails.append({
'id': 'normal',
'url': video['thumbnailUrl'],
})
if video.get('smThumbnailUrl'):
thumbnails.append({
'id': 'small',
'url': video['smThumbnailUrl'],
})
info_dict.update({
'title': video['S_headLine'].strip(),
'description': unescapeHTML(video.get('S_fullStory')),
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
# 'tbr' was explicitly set to be preferred over 'height' originally,
# So this is being kept unless someone can confirm this is unnecessary
'_format_sort_fields': ('tbr', 'res'),
})
entries.append(info_dict)
return self.playlist_result(entries, playlist_id)

View File

@ -1,6 +0,0 @@
from .common import InfoExtractor
class ShareVideosEmbedIE(InfoExtractor):
_VALID_URL = False
_EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1']

View File

@ -1,68 +0,0 @@
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_filesize,
str_to_int,
)
class SnotrIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
_TESTS = [{
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
'info_dict': {
'id': '13708',
'ext': 'mp4',
'title': 'Drone flying through fireworks!',
'duration': 248,
'filesize_approx': 40700000,
'description': 'A drone flying through Fourth of July Fireworks',
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['description'],
}, {
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
'info_dict': {
'id': '530',
'ext': 'mp4',
'title': 'David Letteman - George W. Bush Top 10',
'duration': 126,
'filesize_approx': 8500000,
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
'thumbnail': r're:^https?://.*\.jpg$',
},
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
info_dict = self._parse_html5_media_entries(
url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
view_count = str_to_int(self._html_search_regex(
r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
webpage, 'duration', fatal=False))
filesize_approx = parse_filesize(self._html_search_regex(
r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
webpage, 'filesize', fatal=False))
info_dict.update({
'id': video_id,
'description': description,
'title': title,
'view_count': view_count,
'duration': duration,
'filesize_approx': filesize_approx,
})
return info_dict

View File

@ -1,122 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
unescapeHTML,
unified_timestamp,
xpath_attr,
xpath_element,
xpath_text,
)
class SpringboardPlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
cms\.springboardplatform\.com/
(?:
(?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
)
'''
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
_TESTS = [{
'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
'md5': '5c3cb7b5c55740d482561099e920f192',
'info_dict': {
'id': '981017',
'ext': 'mp4',
'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1409132328,
'upload_date': '20140827',
'duration': 193,
},
'skip': 'Invalid URL',
}, {
'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
'only_matching': True,
}, {
'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
'only_matching': True,
}, {
'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
'info_dict': {
'id': '1731611',
'ext': 'mp4',
'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
},
'skip': 'Invalid URL',
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
index = mobj.group('index') or mobj.group('index_2')
video = self._download_xml(
f'http://cms.springboardplatform.com/xml_feeds_advanced/index/{index}/rss3/{video_id}', video_id)
item = xpath_element(video, './/item', 'item', fatal=True)
content = xpath_element(
item, './{http://search.yahoo.com/mrss/}content', 'content',
fatal=True)
title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
video_url = content.attrib['url']
if 'error_video.mp4' in video_url:
raise ExtractorError(
f'Video {video_id} no longer exists', expected=True)
duration = int_or_none(content.get('duration'))
tbr = int_or_none(content.get('bitrate'))
filesize = int_or_none(content.get('fileSize'))
width = int_or_none(content.get('width'))
height = int_or_none(content.get('height'))
description = unescapeHTML(xpath_text(
item, './description', 'description'))
thumbnail = xpath_attr(
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
'thumbnail')
timestamp = unified_timestamp(xpath_text(
item, './{http://cms.springboardplatform.com/namespaces.html}created',
'timestamp'))
formats = [{
'url': video_url,
'format_id': 'http',
'tbr': tbr,
'filesize': filesize,
'width': width,
'height': height,
}]
m3u8_format = formats[0].copy()
m3u8_format.update({
'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
'ext': 'mp4',
'format_id': 'hls',
'protocol': 'm3u8_native',
})
formats.append(m3u8_format)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}

View File

@ -1,89 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
orderedSet,
unescapeHTML,
)
class StanfordOpenClassroomIE(InfoExtractor):
IE_NAME = 'stanfordoc'
IE_DESC = 'Stanford Open ClassRoom'
_VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = {
'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
'md5': '544a9468546059d4e80d76265b0443b8',
'info_dict': {
'id': 'PracticalUnix_intro-environment',
'ext': 'mp4',
'title': 'Intro Environment',
},
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
video = mobj.group('video')
info = {
'id': course + '_' + video,
'uploader': None,
'upload_date': None,
}
base_url = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xml_url = base_url + video + '.xml'
mdoc = self._download_xml(xml_url, info['id'])
try:
info['title'] = mdoc.findall('./title')[0].text
info['url'] = base_url + mdoc.findall('./videoFile')[0].text
except IndexError:
raise ExtractorError('Invalid metadata XML file')
return info
elif mobj.group('course'): # A course page
course = mobj.group('course')
info = {
'id': course,
'_type': 'playlist',
'uploader': None,
'upload_date': None,
}
coursepage = self._download_webpage(
url, info['id'],
note='Downloading course info page',
errnote='Unable to download course info page')
info['title'] = self._html_search_regex(
r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
info['description'] = self._html_search_regex(
r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False)
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
info['entries'] = [self.url_result(
f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',
) for l in links]
return info
else: # Root page
info = {
'id': 'Stanford OpenClassroom',
'_type': 'playlist',
'uploader': None,
'upload_date': None,
}
info['title'] = info['id']
root_url = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
rootpage = self._download_webpage(root_url, info['id'],
errnote='Unable to download course info page')
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result(
f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',
) for l in links]
return info

View File

@ -1,141 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
clean_podcast_url,
int_or_none,
str_or_none,
try_get,
url_or_none,
)
class StitcherBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
def _call_api(self, path, video_id, query):
resp = self._download_json(
'https://api.prod.stitcher.com/' + path,
video_id, query=query)
error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
if error_massage:
raise ExtractorError(error_massage, expected=True)
return resp['data']
def _extract_description(self, data):
return clean_html(data.get('html_description') or data.get('description'))
def _extract_audio_url(self, episode):
return url_or_none(episode.get('audio_url') or episode.get('guid'))
def _extract_show_info(self, show):
return {
'thumbnail': show.get('image_base_url'),
'series': show.get('title'),
}
def _extract_episode(self, episode, audio_url, show_info):
info = {
'id': str(episode['id']),
'display_id': episode.get('slug'),
'title': episode['title'].strip(),
'description': self._extract_description(episode),
'duration': int_or_none(episode.get('duration')),
'url': clean_podcast_url(audio_url),
'vcodec': 'none',
'timestamp': int_or_none(episode.get('date_published')),
'season_number': int_or_none(episode.get('season')),
'season_id': str_or_none(episode.get('season_id')),
}
info.update(show_info)
return info
class StitcherIE(StitcherBaseIE):
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
'md5': 'e9635098e0da10b21a0e2b85585530f6',
'info_dict': {
'id': '40789481',
'ext': 'mp3',
'title': 'Machine Learning Mastery and Cancer Clusters',
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
'duration': 1604,
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20151008',
'timestamp': 1444285800,
'series': 'Talking Machines',
},
}, {
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
'info_dict': {
'id': '40846275',
'display_id': 'the-rare-hourlong-comedy-plus',
'ext': 'mp3',
'title': "The CW's 'Crazy Ex-Girlfriend'",
'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
'duration': 2235,
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': True,
},
'skip': 'Page Not Found',
}, {
# escaped title
'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
'only_matching': True,
}, {
'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
'only_matching': True,
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
data = self._call_api(
'shows/episodes', audio_id, {'episode_ids': audio_id})
episode = data['episodes'][0]
audio_url = self._extract_audio_url(episode)
if not audio_url:
self.raise_login_required()
show = try_get(data, lambda x: x['shows'][0], dict) or {}
return self._extract_episode(
episode, audio_url, self._extract_show_info(show))
class StitcherShowIE(StitcherBaseIE):
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
_TESTS = [{
'url': 'http://www.stitcher.com/podcast/the-talking-machines',
'info_dict': {
'id': 'the-talking-machines',
'title': 'Talking Machines',
'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
},
'playlist_mincount': 106,
}, {
'url': 'https://www.stitcher.com/show/the-talking-machines',
'only_matching': True,
}]
def _real_extract(self, url):
show_slug = self._match_id(url)
data = self._call_api(
f'search/show/{show_slug}/allEpisodes', show_slug, {'count': 10000})
show = try_get(data, lambda x: x['shows'][0], dict) or {}
show_info = self._extract_show_info(show)
entries = []
for episode in (data.get('episodes') or []):
audio_url = self._extract_audio_url(episode)
if not audio_url:
continue
entries.append(self._extract_episode(episode, audio_url, show_info))
return self.playlist_result(
entries, show_slug, show.get('title'),
self._extract_description(show))

View File

@ -1,35 +0,0 @@
from .common import InfoExtractor
class StretchInternetIE(InfoExtractor):
_VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
_TEST = {
'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
'info_dict': {
'id': '573272',
'ext': 'mp4',
'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
# 'timestamp': 1575668361,
# 'upload_date': '20191206',
'uploader_id': '99997',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
media_url = self._download_json(
'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
video_id)[0]['media'][0]['url']
event = self._download_json(
'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
return {
'id': video_id,
'title': event['title'],
# TODO: parse US timezone abbreviations
# 'timestamp': event.get('dateTimeString'),
'url': 'https://' + media_url,
'uploader_id': event.get('ownerID'),
}

View File

@ -1,45 +0,0 @@
from .vidyard import VidyardBaseIE
from ..utils import ExtractorError, int_or_none, make_archive_id
class SwearnetEpisodeIE(VidyardBaseIE):
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
_TESTS = [{
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
'info_dict': {
'id': 'wicK2EOzjOdxkUXGDIgcPw',
'display_id': '232819',
'ext': 'mp4',
'episode_number': 1,
'episode': 'Episode 1',
'duration': 719,
'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+',
'season': 'Season 1',
'title': 'Episode 1 - Grilled Cheese Sammich',
'season_number': 1,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg',
'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'],
'_old_archive_ids': ['swearnetepisode 232819'],
},
}]
def _real_extract(self, url):
slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, slug)
try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
except ExtractorError:
if 'Upgrade Now' in webpage:
self.raise_login_required()
raise
info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id)
if info.get('display_id'):
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
return {
**info,
'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number),
}

View File

@ -1,33 +0,0 @@
from .common import InfoExtractor
from ..utils import traverse_obj
class SYVDKIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
'md5': '429ce5a423dd4b1e1d0bf3a569558089',
'info_dict': {
'id': '12215',
'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
'ext': 'mp3',
'title': 'Isabella Arendt stiller op for De Konservative',
'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
return {
'id': str(info_data['id']),
'vcodec': 'none',
'ext': 'mp3',
'url': info_data['details']['enclosure'],
'display_id': video_id,
'title': traverse_obj(info_data, ('title', 'rendered')),
'description': traverse_obj(info_data, ('details', 'post_title')),
}

View File

@ -1,114 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
try_call,
)
class IVXPlayerIE(InfoExtractor):
_VALID_URL = r'ivxplayer:(?P<video_id>\d+):(?P<player_key>\w+)'
_TESTS = [{
'url': 'ivxplayer:2366065:4a89dfe6bc8f002596b1dfbd600730b1',
'info_dict': {
'id': '2366065',
'ext': 'mp4',
'duration': 112,
'upload_date': '20221204',
'title': 'Film Indonesia di Disney Content Showcase Asia Pacific 2022',
'timestamp': 1670151746,
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.cantika.com/video/31737/film-indonesia-di-disney-content-showcase-asia-pacific-2022',
'info_dict': {
'id': '2374200',
'ext': 'mp4',
'duration': 110,
'title': 'Serial Indonesia di Disney Content Showcase Asia Pacific 2022',
'timestamp': 1670639416,
'upload_date': '20221210',
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300',
},
}, {
'url': 'https://www.gooto.com/video/11437/wuling-suv-ramai-dikunjungi-di-giias-2018',
'info_dict': {
'id': '892109',
'ext': 'mp4',
'title': 'Wuling SUV Ramai Dikunjungi di GIIAS 2018',
'upload_date': '20180811',
'description': 'md5:6d901483d0aacc664aecb4489719aafa',
'duration': 75,
'timestamp': 1534011263,
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300',
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
# more info at https://player.ivideosmart.com/ivsplayer/v4/dist/js/loader.js
mobj = re.search(
r'<ivs-player\s*[^>]+data-ivs-key\s*=\s*"(?P<player_key>[\w]+)\s*[^>]+\bdata-ivs-vid="(?P<video_id>[\w-]+)',
webpage)
if mobj:
yield f'ivxplayer:{mobj.group("video_id")}:{mobj.group("player_key")}'
raise cls.StopExtraction
def _real_extract(self, url):
video_id, player_key = self._match_valid_url(url).group('video_id', 'player_key')
json_data = self._download_json(
f'https://ivxplayer.ivideosmart.com/prod/video/{video_id}?key={player_key}', video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
json_data['player']['video_url'], video_id)
return {
'id': str(json_data['ivx']['id']),
'title': traverse_obj(json_data, ('ivx', 'name')),
'description': traverse_obj(json_data, ('ivx', 'description')),
'duration': int_or_none(traverse_obj(json_data, ('ivx', 'duration'))),
'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'published_at'))),
'formats': formats,
'subtitles': subtitles,
'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url')),
}
class TempoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tempo\.co/\w+/\d+/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://video.tempo.co/read/30058/anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
'info_dict': {
'id': '2144275',
'display_id': 'anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
'ext': 'mp4',
'title': 'Anies Baswedan Ajukan Banding Putusan PTUN Batalkan UMP DKI',
'duration': 85,
'description': 'md5:a6822b7c4c874fa7e5bd63e96a387b66',
'thumbnail': 'https://statik.tempo.co/data/2022/07/27/id_1128287/1128287_720.jpg',
'timestamp': 1658907970,
'upload_date': '20220727',
'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'],
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
_, video_id, player_key = next(IVXPlayerIE._extract_embed_urls(url, webpage)).split(':')
json_ld_data = self._search_json_ld(webpage, display_id)
return self.url_result(
f'ivxplayer:{video_id}:{player_key}', display_id=display_id,
thumbnail=self._html_search_meta('twitter:image:src', webpage) or self._og_search_thumbnail(webpage),
tags=try_call(lambda: self._html_search_meta('keywords', webpage).split(',')),
description=(json_ld_data.get('description')
or self._html_search_meta(('description', 'twitter:description'), webpage)
or self._og_search_description(webpage)),
url_transparent=True)

View File

@ -1,35 +0,0 @@
from .common import InfoExtractor
from ..utils import extract_attributes, remove_end
class TheHoleTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov',
'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9',
'info_dict': {
'id': 'gromkii-vopros-sergey-orlov',
'ext': 'mp4',
'title': 'Сергей Орлов — Громкий вопрос',
'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts',
'description': 'md5:45741a9202331f995d9fb76996759379',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_attrs = extract_attributes(self._search_regex(
r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
player_attrs['data-player-source-value'], video_id, 'mp4')
return {
'id': video_id,
'title': remove_end(self._html_extract_title(webpage), ' — The Hole'),
'description': self._og_search_description(webpage),
'thumbnail': player_attrs.get('data-player-poster-value'),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -1,61 +0,0 @@
import re
from .common import InfoExtractor
class TrailerAddictIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
_TEST = {
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
'md5': '41365557f3c8c397d091da510e73ceb4',
'info_dict': {
'id': '76184',
'ext': 'mp4',
'title': 'Prince Avalanche Trailer',
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
},
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
name = mobj.group('movie') + '/' + mobj.group('trailer_name')
webpage = self._download_webpage(url, name)
title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
view_count_str = self._search_regex(
r'<span class="views_n">([0-9,.]+)</span>',
webpage, 'view count', fatal=False)
view_count = (
None if view_count_str is None
else int(view_count_str.replace(',', '')))
video_id = self._search_regex(
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
webpage, 'video id')
# Presence of (no)watchplus function indicates HD quality is available
if re.search(r'function (no)?watchplus()', webpage):
fvar = 'fvarhd'
else:
fvar = 'fvar'
info_url = f'http://www.traileraddict.com/{fvar}.php?tid={video_id!s}'
info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')
final_url = self._search_regex(r'&fileurl=(.+)',
info_webpage, 'Download url').replace('%3F', '?')
thumbnail_url = self._search_regex(r'&image=(.+?)&',
info_webpage, 'thumbnail url')
description = self._html_search_regex(
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'url': final_url,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
'view_count': view_count,
}

View File

@ -1,329 +0,0 @@
import itertools
import json
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
UnsupportedError,
determine_ext,
int_or_none,
parse_resolution,
str_or_none,
traverse_obj,
unified_timestamp,
url_basename,
url_or_none,
urljoin,
)
class TrillerBaseIE(InfoExtractor):
_NETRC_MACHINE = 'triller'
_API_BASE_URL = 'https://social.triller.co/v1.5'
_API_HEADERS = {'Origin': 'https://triller.co'}
def _perform_login(self, username, password):
if self._API_HEADERS.get('Authorization'):
return
headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
user_check = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
fatal=False, expected_status=400, headers=headers,
data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
if user_check: # endpoint returns `"status":false` if username exists
raise ExtractorError('Unable to login: Invalid username', expected=True)
login = self._download_json(
f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
expected_status=400, headers=headers, data=json.dumps({
'username': username,
'password': password,
}, separators=(',', ':')).encode()) or {}
if not login.get('auth_token'):
if login.get('error') == 1008:
raise ExtractorError('Unable to login: Incorrect password', expected=True)
raise ExtractorError('Unable to login')
self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'
def _get_comments(self, video_id, limit=15):
comment_info = self._download_json(
f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
video_id, fatal=False, note='Downloading comments API JSON',
headers=self._API_HEADERS, query={'limit': limit}) or {}
if not comment_info.get('comments'):
return
yield from traverse_obj(comment_info, ('comments', ..., {
'id': ('id', {str_or_none}),
'text': 'body',
'author': ('author', 'username'),
'author_id': ('author', 'user_id'),
'timestamp': ('timestamp', {unified_timestamp}),
}))
def _parse_video_info(self, video_info, username, user_id, display_id=None):
video_id = str(video_info['id'])
display_id = display_id or video_info.get('video_uuid')
if traverse_obj(video_info, (
None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
{lambda x: re.search(r'/copyright/', x)}), get_all=False):
self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
def format_info(url):
return {
'url': url,
'ext': determine_ext(url),
'format_id': url_basename(url).split('.')[0],
}
formats = []
if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
formats.append({
**format_info(video['url']),
**parse_resolution(video.get('resolution')),
'vcodec': video.get('codec'),
'vbr': int_or_none(video.get('bitrate'), 1000),
})
video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
if video_url:
formats.append({
**format_info(video_url),
'vcodec': 'h264',
**traverse_obj(video_info, {
'width': 'width',
'height': 'height',
'filesize': 'filesize',
}, expected_type=int_or_none),
})
audio_url = url_or_none(video_info.get('audio_url'))
if audio_url:
formats.append(format_info(audio_url))
comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
return {
'id': video_id,
'display_id': display_id,
'uploader': username,
'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
'uploader_url': f'https://triller.co/@{username}',
'extractor_key': TrillerIE.ie_key(),
'extractor': TrillerIE.IE_NAME,
'formats': formats,
'comment_count': comment_count,
'__post_extractor': self.extract_comments(video_id, comment_count),
**traverse_obj(video_info, {
'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
'description': 'description',
'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
'thumbnail': ('thumbnail_url', {url_or_none}),
'timestamp': ('timestamp', {unified_timestamp}),
'duration': ('duration', {int_or_none}),
'view_count': ('play_count', {int_or_none}),
'like_count': ('likes_count', {int_or_none}),
'artist': 'song_artist',
'track': 'song_title',
}, get_all=False),
}
class TrillerIE(TrillerBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?triller\.co/
@(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
'''
_TESTS = [{
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
'md5': '228662d783923b60d78395fedddc0a20',
'info_dict': {
'id': '71595734',
'ext': 'mp4',
'title': 'md5:9a2bf9435c5c4292678996a464669416',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
'description': 'md5:9a2bf9435c5c4292678996a464669416',
'uploader': 'theestallion',
'uploader_id': '18992236',
'creator': 'Megan Thee Stallion',
'timestamp': 1660598222,
'upload_date': '20220815',
'duration': 47,
'view_count': int,
'like_count': int,
'artist': 'Megan Thee Stallion',
'track': 'Her',
'uploader_url': 'https://triller.co/@theestallion',
'comment_count': int,
},
'skip': 'This video has been removed due to licensing restrictions',
}, {
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
'md5': '874055f462af5b0699b9dbb527a505a0',
'info_dict': {
'id': '71621339',
'ext': 'mp4',
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
'uploader': 'charlidamelio',
'uploader_id': '1875551',
'creator': 'charli damelio',
'timestamp': 1660773354,
'upload_date': '20220817',
'duration': 16,
'view_count': int,
'like_count': int,
'artist': 'Dixie',
'track': 'Someone to Blame',
'uploader_url': 'https://triller.co/@charlidamelio',
'comment_count': int,
},
}, {
'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
'md5': 'af7b3553e4b8bfca507636471ee2eb41',
'info_dict': {
'id': '71837829',
'ext': 'mp4',
'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop',
'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
'uploader': 'theestallion',
'uploader_id': '18992236',
'creator': 'Megan Thee Stallion',
'timestamp': 1662486178,
'upload_date': '20220906',
'duration': 30,
'view_count': int,
'like_count': int,
'artist': 'Unknown',
'track': 'Unknown',
'uploader_url': 'https://triller.co/@theestallion',
'comment_count': int,
},
}]
def _real_extract(self, url):
username, display_id = self._match_valid_url(url).group('username', 'id')
video_info = self._download_json(
f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
headers=self._API_HEADERS)['videos'][0]
return self._parse_video_info(video_info, username, None, display_id)
class TrillerUserIE(TrillerBaseIE):
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://triller.co/@theestallion',
'playlist_mincount': 12,
'info_dict': {
'id': '18992236',
'title': 'theestallion',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
},
}, {
'url': 'https://triller.co/@charlidamelio',
'playlist_mincount': 150,
'info_dict': {
'id': '1875551',
'title': 'charlidamelio',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
},
}]
def _real_initialize(self):
if not self._API_HEADERS.get('Authorization'):
guest = self._download_json(
f'{self._API_BASE_URL}/user/create_guest', None,
note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
'platform': 'Web',
'app_version': '',
})
if not guest.get('auth_token'):
raise ExtractorError('Unable to fetch required auth token for user extraction')
self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
def _entries(self, username, user_id, limit=6):
query = {'limit': limit}
for page in itertools.count(1):
videos = self._download_json(
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
username, note=f'Downloading user video list page {page}',
headers=self._API_HEADERS, query=query)
for video in traverse_obj(videos, ('videos', ...)):
yield self._parse_video_info(video, username, user_id)
query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
if not query['before_time']:
break
def _real_extract(self, url):
username = self._match_id(url)
user_info = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/api/users/by_username/{username}',
username, note='Downloading user info', headers=self._API_HEADERS), ('user', {dict})) or {}
if user_info.get('private') and user_info.get('followed_by_me') not in (True, 'true'):
raise ExtractorError('This user profile is private', expected=True)
elif traverse_obj(user_info, (('blocked_by_user', 'blocking_user'), {bool}), get_all=False):
raise ExtractorError('The author of the video is blocked', expected=True)
user_id = str_or_none(user_info.get('user_id'))
if not user_id:
raise ExtractorError('Unable to extract user ID')
return self.playlist_result(
self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
class TrillerShortIE(InfoExtractor):
_VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
_TESTS = [{
'url': 'https://v.triller.co/WWZNWk',
'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
'info_dict': {
'id': '66210052',
'ext': 'mp4',
'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
'uploader': 'statefairent',
'uploader_id': '487545193',
'creator': 'Official Summer Fair of LA',
'timestamp': 1629655457,
'upload_date': '20210822',
'duration': 19,
'view_count': int,
'like_count': int,
'artist': 'Unknown',
'track': 'Unknown',
'uploader_url': 'https://triller.co/@statefairent',
'comment_count': int,
},
}]
def _real_extract(self, url):
real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).url
if self.suitable(real_url): # Prevent infinite loop in case redirect fails
raise UnsupportedError(real_url)
return self.url_result(real_url)

View File

@ -1,79 +0,0 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
extract_attributes,
try_get,
urlencode_postdata,
)
class TVPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://tvplayer.com/watch/bbcone',
'info_dict': {
'id': '89',
'ext': 'mp4',
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
current_channel = extract_attributes(self._search_regex(
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
webpage, 'channel element'))
title = current_channel['data-name']
resource_id = current_channel['data-id']
token = self._search_regex(
r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
'token', group='token')
context = self._download_json(
'https://tvplayer.com/watch/context', display_id,
'Downloading JSON context', query={
'resource': resource_id,
'gen': token,
})
validate = context['validate']
platform = try_get(
context, lambda x: x['platform']['key'], str) or 'firefox'
try:
response = self._download_json(
'http://api.tvplayer.com/api/v2/stream/live',
display_id, 'Downloading JSON stream', headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}, data=urlencode_postdata({
'id': resource_id,
'service': 1,
'platform': platform,
'validate': validate,
}))['tvplayer']['response']
except ExtractorError as e:
if isinstance(e.cause, HTTPError):
response = self._parse_json(
e.cause.response.read().decode(), resource_id)['tvplayer']['response']
raise ExtractorError(
'{} said: {}'.format(self.IE_NAME, response['error']), expected=True)
raise
formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
return {
'id': resource_id,
'display_id': display_id,
'title': title,
'formats': formats,
'is_live': True,
}

View File

@ -5,9 +5,3 @@ class UFCTVIE(ImgGamingBaseIE):
_VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
_NETRC_MACHINE = 'ufctv'
_REALM = 'ufc'
class UFCArabiaIE(ImgGamingBaseIE):
_VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
_NETRC_MACHINE = 'ufcarabia'
_REALM = 'admufc'

View File

@ -1,36 +0,0 @@
from .common import InfoExtractor
class UKTVPlayIE(InfoExtractor):
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)'
_TESTS = [{
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
'info_dict': {
'id': '2117008346001',
'ext': 'mp4',
'title': 'Pincers',
'description': 'Pincers',
'uploader_id': '1242911124001',
'upload_date': '20130124',
'timestamp': 1359049267,
},
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to download MPD manifest'],
}, {
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
'only_matching': True,
}, {
'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true',
'only_matching': True,
}]
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % video_id,
'BrightcoveNew', video_id)

View File

@ -71,6 +71,8 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'watch\.telusoriginals\.com',
r'video\.unext\.jp',
r'www\.web\.nhk',
r'fod\.fujitv\.co\.jp',
r'zee5\.com',
)
_TESTS = [{
@ -252,6 +254,14 @@ class KnownDRMIE(UnsupportedInfoExtractor):
# https://github.com/yt-dlp/yt-dlp/issues/14620
'url': 'https://www.web.nhk/tv/an/72hours/pl/series-tep-W3W8WRN8M3/ep/QW8ZY6146V',
'only_matching': True,
}, {
# https://github.com/yt-dlp/yt-dlp/issues/7064
# https://github.com/yt-dlp/yt-dlp/issues/10264
'url': 'https://fod.fujitv.co.jp/title/709f/709f130001/',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -291,6 +301,8 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'einthusan\.(?:tv|com|ca)',
r'yourupload\.com',
r'xanimu\.com',
r'musicdex\.org',
r'duboku\.io',
)
_TESTS = [{

View File

@ -1,98 +0,0 @@
from .common import InfoExtractor
from ..utils import (
dict_get,
int_or_none,
str_or_none,
try_get,
unified_strdate,
url_or_none,
)
class UtreonIE(InfoExtractor):
IE_NAME = 'playeur'
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://utreon.com/v/z_I7ikQbuDw',
'info_dict': {
'id': 'z_I7ikQbuDw',
'ext': 'mp4',
'title': 'Freedom Friday meditation - Rising in the wind',
'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
'uploader': 'Heather Dawn Elemental Health',
'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723',
'duration': 586,
},
}, {
'url': 'https://utreon.com/v/jerJw5EOOVU',
'info_dict': {
'id': 'jerJw5EOOVU',
'ext': 'mp4',
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
'uploader': 'Frases e Poemas Quotes and Poems',
'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723',
'duration': 60,
},
}, {
'url': 'https://utreon.com/v/C4ZxXhYBBmE',
'info_dict': {
'id': 'C4ZxXhYBBmE',
'ext': 'mp4',
'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest',
'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
'uploader': 'Nomad Capitalist',
'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723',
'duration': 884,
},
}, {
'url': 'https://utreon.com/v/Y-stEH-FBm8',
'info_dict': {
'id': 'Y-stEH-FBm8',
'ext': 'mp4',
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
'uploader': 'Merryweather Comics',
'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210718',
'duration': 151,
},
}, {
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
'info_dict': {
'id': 'Wzqp-UrxSeu',
'ext': 'mp4',
'title': 'Update: Clockwork Basilisk Books on the Way!',
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
'uploader': 'Forgotten Weapons',
'release_date': '20240208',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 262,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
'https://api.playeur.com/v1/videos/' + video_id,
video_id)
videos_json = json_data['videos']
formats = [{
'url': format_url,
'format_id': format_key.split('_')[1],
'height': int(format_key.split('_')[1][:-1]),
} for format_key, format_url in videos_json.items() if url_or_none(format_url)]
thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
return {
'id': video_id,
'title': json_data['title'],
'formats': formats,
'description': str_or_none(json_data.get('description')),
'duration': int_or_none(json_data.get('duration')),
'uploader': str_or_none(try_get(json_data, lambda x: x['channel']['title'])),
'thumbnail': thumbnail,
'release_date': unified_strdate(json_data.get('published_datetime')),
}

View File

@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
from .xstream import XstreamIE
from ..utils import (
ExtractorError,
float_or_none,
@ -9,7 +8,8 @@ from ..utils import (
)
class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE
class VGTVIE(InfoExtractor):
_WORKING = False
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_GEO_BYPASS = False

View File

@ -1,27 +0,0 @@
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
class VideoDetectiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
'info_dict': {
'id': '194487',
'ext': 'mp4',
'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
query = 'customerid=69249&publishedid=' + video_id
return self.url_result(
InternetVideoArchiveIE._build_json_url(query),
ie=InternetVideoArchiveIE.ie_key())

View File

@ -1,51 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class VideofyMeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = 'videofy.me'
_TEST = {
'url': 'http://www.videofy.me/thisisvideofyme/1100701',
'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
'info_dict': {
'id': '1100701',
'ext': 'mp4',
'title': 'This is VideofyMe',
'description': '',
'upload_date': '20130326',
'timestamp': 1364288959,
'uploader': 'VideofyMe',
'uploader_id': 'thisisvideofyme',
'view_count': int,
'like_count': int,
'comment_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(f'http://vf-player-info-loader.herokuapp.com/{video_id}.json', video_id)['videoinfo']
video = config.get('video')
blog = config.get('blog', {})
return {
'id': video_id,
'title': video['title'],
'url': video['sources']['source']['url'],
'thumbnail': video.get('thumb'),
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('date')),
'uploader': blog.get('name'),
'uploader_id': blog.get('identifier'),
'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)),
'like_count': int_or_none(video.get('likes')),
'comment_count': int_or_none(video.get('nrOfComments')),
}

View File

@ -1,190 +1,15 @@
import base64
import functools
import math
import re
import time
import urllib.parse
from .common import InfoExtractor
from .slideslive import SlidesLiveIE
from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
remove_start,
traverse_obj,
update_url_query,
url_or_none,
)
class VideoKenBaseIE(InfoExtractor):
_ORGANIZATIONS = {
'videos.icts.res.in': 'icts',
'videos.cncf.io': 'cncf',
'videos.neurips.cc': 'neurips',
}
_BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
_PAGE_SIZE = 12
def _get_org_id_and_api_key(self, org, video_id):
details = self._download_json(
f'https://analytics.videoken.com/api/videolake/{org}/details', video_id,
note='Downloading organization ID and API key', headers={
'Accept': 'application/json',
})
return details['id'], details['apikey']
def _create_slideslive_url(self, video_url, video_id, referer):
if not video_url and not video_id:
return
elif not video_url or 'embed/sign-in' in video_url:
video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
if url_or_none(referer):
return update_url_query(video_url, {
'embed_parent_url': referer,
'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}',
})
return video_url
def _extract_videos(self, videos, url):
for video in traverse_obj(videos, (('videos', 'results'), ...)):
video_id = traverse_obj(video, 'youtube_id', 'videoid')
if not video_id:
continue
ie_key = None
if traverse_obj(video, 'type', 'source') == 'youtube':
video_url = video_id
ie_key = 'Youtube'
else:
video_url = traverse_obj(video, 'embed_url', 'embeddableurl', expected_type=url_or_none)
if not video_url:
continue
elif urllib.parse.urlparse(video_url).hostname == 'slideslive.com':
ie_key = SlidesLiveIE
video_url = self._create_slideslive_url(video_url, video_id, url)
yield self.url_result(video_url, ie_key, video_id)
class VideoKenIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
_TESTS = [{
# neurips -> videoken -> slideslive
'url': 'https://videos.neurips.cc/video/slideslive-38922815',
'info_dict': {
'id': '38922815',
'ext': 'mp4',
'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
'timestamp': 1630939331,
'upload_date': '20210906',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:330',
'chapters': 'count:329',
},
'params': {
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to download VideoKen API JSON'],
}, {
# neurips -> videoken -> slideslive -> youtube
'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
'info_dict': {
'id': '2Xa_dt78rJE',
'ext': 'mp4',
'display_id': '38923348',
'title': 'Machine Education',
'description': 'Watch full version of this video at https://slideslive.com/38923348.',
'channel': 'SlidesLive Videos - G2',
'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
'uploader': 'SlidesLive Videos - G2',
'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
'duration': 2504,
'timestamp': 1618922125,
'upload_date': '20200131',
'age_limit': 0,
'channel_follower_count': int,
'view_count': int,
'availability': 'unlisted',
'live_status': 'not_live',
'playable_in_embed': True,
'categories': ['People & Blogs'],
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:78',
'chapters': 'count:77',
},
'params': {
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to download VideoKen API JSON'],
}, {
# icts -> videoken -> youtube
'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
'info_dict': {
'id': 'zysIsojYdvc',
'ext': 'mp4',
'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
'description': 'md5:87433069d79719eeadc1962cc2ace00b',
'channel': 'International Centre for Theoretical Sciences',
'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
'uploader': 'International Centre for Theoretical Sciences',
'uploader_id': 'ICTStalks',
'uploader_url': 'http://www.youtube.com/user/ICTStalks',
'duration': 3372,
'upload_date': '20191004',
'age_limit': 0,
'live_status': 'not_live',
'availability': 'public',
'playable_in_embed': True,
'channel_follower_count': int,
'like_count': int,
'view_count': int,
'categories': ['Science & Technology'],
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:42',
'chapters': 'count:20',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
'only_matching': True,
}, {
'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
'only_matching': True,
}, {
'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
'only_matching': True,
}]
def _real_extract(self, url):
hostname, video_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id)
details = self._download_json(
'https://analytics.videoken.com/api/videoinfo_private', video_id, query={
'videoid': video_id,
'org_id': org_id,
}, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON',
errnote='Failed to download VideoKen API JSON', fatal=False)
if details:
return next(self._extract_videos({'videos': [details]}, url))
# fallback for API error 400 response
elif video_id.startswith('slideslive-'):
return self.url_result(
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
elif re.match(r'^[\w-]{11}$', video_id):
return self.url_result(video_id, 'Youtube', video_id)
else:
raise ExtractorError('Unable to extract without VideoKen API response')
class VideoKenPlayerIE(VideoKenBaseIE):
class VideoKenPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
_TESTS = [{
'url': 'https://player.videoken.com/embed/slideslive-38968434',
@ -203,135 +28,19 @@ class VideoKenPlayerIE(VideoKenBaseIE):
},
}]
def _create_slideslive_url(self, video_url, video_id, referer):
if not video_url and not video_id:
return
elif not video_url or 'embed/sign-in' in video_url:
video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
if url_or_none(referer):
return update_url_query(video_url, {
'embed_parent_url': referer,
'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}',
})
return video_url
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
class VideoKenPlaylistIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'https://videos.icts.res.in/category/1822/playlist/381',
'playlist_mincount': 117,
'info_dict': {
'id': '381',
'title': 'Cosmology - The Next Decade',
},
}]
def _real_extract(self, url):
hostname, playlist_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id)
videos = self._download_json(
f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON')
return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title'))
class VideoKenCategoryIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://videos.icts.res.in/category/1822/',
'playlist_mincount': 500,
'info_dict': {
'id': '1822',
'title': 'Programs',
},
}, {
'url': 'https://videos.neurips.cc/category/350/',
'playlist_mincount': 34,
'info_dict': {
'id': '350',
'title': 'NeurIPS 2018',
},
}, {
'url': 'https://videos.cncf.io/category/479/',
'playlist_mincount': 328,
'info_dict': {
'id': '479',
'title': 'KubeCon + CloudNativeCon Europe\'19',
},
}]
def _get_category_page(self, category_id, org_id, page=1, note=None):
return self._download_json(
f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id,
fatal=False, note=note if note else f'Downloading category page {page}',
query={
'category_id': category_id,
'page_number': page,
'length': self._PAGE_SIZE,
}, headers={'Accept': 'application/json'}) or {}
def _entries(self, category_id, org_id, url, page):
videos = self._get_category_page(category_id, org_id, page + 1)
yield from self._extract_videos(videos, url)
def _real_extract(self, url):
hostname, category_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id)
category_info = self._get_category_page(category_id, org_id, note='Downloading category info')
category = category_info['category_name']
total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE)
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, category_id, org_id, url),
total_pages, self._PAGE_SIZE), category_id, category)
class VideoKenTopicIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://videos.neurips.cc/topic/machine%20learning/',
'playlist_mincount': 500,
'info_dict': {
'id': 'machine_learning',
'title': 'machine learning',
},
}, {
'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
'playlist_mincount': 77,
'info_dict': {
'id': 'gravitational_waves',
'title': 'gravitational waves',
},
}, {
'url': 'https://videos.cncf.io/topic/prometheus/',
'playlist_mincount': 134,
'info_dict': {
'id': 'prometheus',
'title': 'prometheus',
},
}]
def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None):
return self._download_json(
'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={
'orgid': org_id,
'size': self._PAGE_SIZE,
'query': topic,
'page': page,
'sort': 'upload_desc',
'filter': 'all',
'token': api_key,
'is_topic': 'true',
'category': '',
'searchid': search_id,
}, headers={'Accept': 'application/json'},
note=note if note else f'Downloading topic page {page}') or {}
def _entries(self, topic, org_id, search_id, api_key, url, page):
videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1)
yield from self._extract_videos(videos, url)
def _real_extract(self, url):
hostname, topic_id = self._match_valid_url(url).group('host', 'id')
topic = urllib.parse.unquote(topic_id)
topic_id = topic.replace(' ', '_')
org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic)
search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode()
total_pages = int_or_none(self._get_topic_page(
topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages'])
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, topic, org_id, search_id, api_key, url),
total_pages, self._PAGE_SIZE), topic_id, topic)

View File

@ -1,304 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_qs,
)
class VideomoreBaseIE(InfoExtractor):
_API_BASE_URL = 'https://more.tv/api/v3/web/'
_VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
def _download_page_data(self, display_id):
return self._download_json(
self._API_BASE_URL + 'PageData', display_id, query={
'url': '/' + display_id,
})['attributes']['response']['data']
def _track_url_result(self, track):
track_vod = track['trackVod']
video_url = track_vod.get('playerLink') or track_vod['link']
return self.url_result(
video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
class VideomoreIE(InfoExtractor):
IE_NAME = 'videomore'
_VALID_URL = r'''(?x)
videomore:(?P<sid>\d+)$|
https?://
(?:
videomore\.ru/
(?:
embed|
[^/]+/[^/]+
)/|
(?:
(?:player\.)?videomore\.ru|
siren\.more\.tv/player
)/[^/]*\?.*?\btrack_id=|
odysseus\.more.tv/player/(?P<partner_id>\d+)/
)
(?P<id>\d+)
(?:[/?#&]|\.(?:xml|json)|$)
'''
_EMBED_REGEX = [r'''(?x)
(?:
<iframe[^>]+src=([\'"])|
<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=
)(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?)
''']
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
'info_dict': {
'id': '367617',
'ext': 'flv',
'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
'series': 'Кино в деталях',
'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2910,
'view_count': int,
'comment_count': int,
'age_limit': 16,
},
'skip': 'The video is not available for viewing.',
}, {
'url': 'http://videomore.ru/embed/259974',
'info_dict': {
'id': '259974',
'ext': 'mp4',
'title': 'Молодежка 2 сезон 40 серия',
'series': 'Молодежка',
'season': '2 сезон',
'episode': '40 серия',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2789,
'view_count': int,
'age_limit': 16,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
'info_dict': {
'id': '341073',
'ext': 'flv',
'title': 'Промо Команда проиграла из-за Бакина?',
'episode': 'Команда проиграла из-за Бакина?',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 29,
'age_limit': 16,
'view_count': int,
},
'params': {
'skip_download': True,
},
'skip': 'The video is not available for viewing.',
}, {
'url': 'http://videomore.ru/elki_3?track_id=364623',
'only_matching': True,
}, {
'url': 'http://videomore.ru/embed/364623',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/364623.xml',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/364623.json',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
'only_matching': True,
}, {
'url': 'videomore:367617',
'only_matching': True,
}, {
'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
'only_matching': True,
}, {
'url': 'https://odysseus.more.tv/player/1788/352317',
'only_matching': True,
}, {
'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
'only_matching': True,
}]
_GEO_BYPASS = False
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('sid') or mobj.group('id')
partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97'
item = self._download_json(
'https://siren.more.tv/player/config', video_id, query={
'partner_id': partner_id,
'track_id': video_id,
})['data']['playlist']['items'][0]
title = item.get('title')
series = item.get('project_name')
season = item.get('season_name')
episode = item.get('episode_name')
if not title:
title = []
for v in (series, season, episode):
if v:
title.append(v)
title = ' '.join(title)
streams = item.get('streams') or []
for protocol in ('DASH', 'HLS'):
stream_url = item.get(protocol.lower() + '_url')
if stream_url:
streams.append({'protocol': protocol, 'url': stream_url})
formats = []
for stream in streams:
stream_url = stream.get('url')
if not stream_url:
continue
protocol = stream.get('protocol')
if protocol == 'DASH':
formats.extend(self._extract_mpd_formats(
stream_url, video_id, mpd_id='dash', fatal=False))
elif protocol == 'HLS':
formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif protocol == 'MSS':
formats.extend(self._extract_ism_formats(
stream_url, video_id, ism_id='mss', fatal=False))
if not formats:
error = item.get('error')
if error:
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
self.raise_geo_restricted(countries=['RU'], metadata_available=True)
self.raise_no_formats(error, expected=True)
return {
'id': video_id,
'title': title,
'series': series,
'season': season,
'episode': episode,
'thumbnail': item.get('thumbnail_url'),
'duration': int_or_none(item.get('duration')),
'view_count': int_or_none(item.get('views')),
'age_limit': int_or_none(item.get('min_age')),
'formats': formats,
}
class VideomoreVideoIE(VideomoreBaseIE):
IE_NAME = 'videomore:video'
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
# single video with og:video:iframe
'url': 'http://videomore.ru/elki_3',
'info_dict': {
'id': '364623',
'ext': 'flv',
'title': 'Ёлки 3',
'description': '',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 5579,
'age_limit': 6,
'view_count': int,
},
'params': {
'skip_download': True,
},
'skip': 'Requires logging in',
}, {
# season single series with og:video:iframe
'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
'info_dict': {
'id': '352317',
'ext': 'mp4',
'title': 'Последний мент 1 сезон 14 серия',
'series': 'Последний мент',
'season': '1 сезон',
'episode': '14 серия',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2464,
'age_limit': 16,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
'only_matching': True,
}, {
# single video without og:video:iframe
'url': 'http://videomore.ru/marin_i_ego_druzya',
'info_dict': {
'id': '359073',
'ext': 'flv',
'title': '1 серия. Здравствуй, Аквавилль!',
'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 754,
'age_limit': 6,
'view_count': int,
},
'params': {
'skip_download': True,
},
'skip': 'redirects to https://more.tv/',
}, {
'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
'only_matching': True,
}, {
'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if VideomoreIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
return self._track_url_result(self._download_page_data(display_id))
class VideomoreSeasonIE(VideomoreBaseIE):
IE_NAME = 'videomore:season'
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
'url': 'http://videomore.ru/molodezhka/film_o_filme',
'info_dict': {
'id': 'molodezhka/film_o_filme',
'title': 'Фильм о фильме',
},
'playlist_mincount': 3,
}, {
'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
'only_matching': True,
}, {
'url': 'https://more.tv/molodezhka/film_o_filme',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
else super().suitable(url))
def _real_extract(self, url):
display_id = self._match_id(url)
season = self._download_page_data(display_id)
season_id = str(season['id'])
tracks = self._download_json(
self._API_BASE_URL + f'seasons/{season_id}/tracks',
season_id)['data']
entries = []
for track in tracks:
entries.append(self._track_url_result(track))
return self.playlist_result(entries, display_id, season.get('title'))

View File

@ -1,66 +0,0 @@
from .common import InfoExtractor
class VimmIE(InfoExtractor):
IE_NAME = 'Vimm:stream'
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/(?:c/)?(?P<id>[0-9a-z-]+)$'
_TESTS = [{
'url': 'https://www.vimm.tv/c/calimeatwagon',
'info_dict': {
'id': 'calimeatwagon',
'ext': 'mp4',
'title': 're:^calimeatwagon [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'live_status': 'is_live',
},
'skip': 'Live',
}, {
'url': 'https://www.vimm.tv/octaafradio',
'only_matching': True,
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True)
return {
'id': channel_id,
'title': channel_id,
'is_live': True,
'formats': formats,
'subtitles': subs,
}
class VimmRecordingIE(InfoExtractor):
IE_NAME = 'Vimm:recording'
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<channel_id>[0-9a-z-]+)\?v=(?P<video_id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://www.vimm.tv/c/kaldewei?v=2JZsrPTFxsSz',
'md5': '15122ee95baa32a548e4a3e120b598f1',
'info_dict': {
'id': '2JZsrPTFxsSz',
'ext': 'mp4',
'title': 'VIMM - [DE/GER] Kaldewei Live - In Farbe und Bunt',
'uploader_id': 'kaldewei',
},
}]
def _real_extract(self, url):
channel_id, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
return {
'id': video_id,
'title': title,
'is_live': False,
'uploader_id': channel_id,
'formats': formats,
'subtitles': subs,
}

View File

@ -1,29 +0,0 @@
from .onet import OnetBaseIE
class VODPlIE(OnetBaseIE):
_VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
'info_dict': {
'id': '3ep3jns',
'ext': 'mp4',
'title': 'Chłopaki nie płaczą',
'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
'timestamp': 1463415154,
'duration': 5765,
'upload_date': '20160516',
},
}, {
'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
info_dict['id'] = video_id
return info_dict

View File

@ -1,66 +0,0 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
remove_end,
)
class VuClipIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247&section=recommend',
'info_dict': {
'id': '1129900602',
'ext': '3gp',
'title': 'Top 10 TV Convicts',
'duration': 733,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ad_m = re.search(
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
if ad_m:
urlr = urllib.parse.urlparse(url)
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
webpage = self._download_webpage(
adfree_url, video_id, note='Download post-ad page')
error_msg = self._html_search_regex(
r'<p class="message">(.*?)</p>', webpage, 'error message',
default=None)
if error_msg:
raise ExtractorError(
f'{self.IE_NAME} said: {error_msg}', expected=True)
# These clowns alternate between two page types
video_url = self._search_regex(
r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
webpage, 'video URL', default=None)
if video_url:
formats = [{
'url': video_url,
}]
else:
formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
title = remove_end(self._html_search_regex(
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
duration = parse_duration(self._html_search_regex(
r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
return {
'id': video_id,
'formats': formats,
'title': title,
'duration': duration,
}

View File

@ -1,336 +0,0 @@
import functools
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
)
class VVVVIDIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
_VALID_URL = rf'{_VALID_URL_BASE}(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
'info_dict': {
'id': '505692',
'ext': 'mp4',
'title': 'Playstation VR cambierà il nostro modo di giocare',
'duration': 93,
'series': 'The Power of Computing',
'season_id': '518',
'episode': 'Playstation VR cambierà il nostro modo di giocare',
'episode_id': '4747',
'view_count': int,
'like_count': int,
'repost_count': int,
'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
},
'params': {
'skip_download': True,
},
}, {
# video_type == 'video/rcs'
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
'info_dict': {
'id': '482493',
'ext': 'mp4',
'title': 'Episodio 01',
},
'params': {
'skip_download': True,
},
'skip': 'Every video/rcs is not working even in real website',
}, {
# video_type == 'video/youtube'
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
'md5': '33e0edfba720ad73a8782157fdebc648',
'info_dict': {
'id': 'RzmFKUDOUgw',
'ext': 'mp4',
'title': 'Trailer',
'upload_date': '20150906',
'description': 'md5:a5e802558d35247fee285875328c0b80',
'uploader_id': '@EMOTIONLabelChannel',
'uploader': 'EMOTION Label Channel',
'episode_id': '3115',
'view_count': int,
'like_count': int,
'repost_count': int,
'availability': str,
'categories': list,
'age_limit': 0,
'channel': 'EMOTION Label Channel',
'channel_follower_count': int,
'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
'comment_count': int,
'duration': 133,
'episode': 'Trailer',
'heatmap': list,
'live_status': 'not_live',
'playable_in_embed': True,
'season_id': '406',
'series': 'One-Punch Man',
'tags': list,
'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
},
'params': {
'skip_download': True,
},
}, {
# video_type == 'video/dash'
'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
'info_dict': {
'id': '527551',
'ext': 'mp4',
'title': 'Golden Wind',
'duration': 1430,
'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
'season_id': '938',
'episode': 'Golden Wind',
'episode_number': 1,
'episode_id': '9089',
'view_count': int,
'like_count': int,
'repost_count': int,
'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
'season': 'Season 5',
'season_number': 5,
},
'params': {
'skip_download': True,
'format': 'mp4',
},
}, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True,
}]
_conn_id = None
@functools.cached_property
def _headers(self):
return {
**self.geo_verification_headers(),
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
}
def _real_initialize(self):
self._conn_id = self._download_json(
'https://www.vvvvid.it/user/login',
None, headers=self._headers)['data']['conn_id']
def _download_info(self, show_id, path, video_id, fatal=True, query=None):
q = {
'conn_id': self._conn_id,
}
if query:
q.update(query)
response = self._download_json(
f'https://www.vvvvid.it/vvvvid/ondemand/{show_id}/{path}',
video_id, headers=self._headers, query=q, fatal=fatal)
if not (response or fatal):
return
if response.get('result') == 'error':
raise ExtractorError('{} said: {}'.format(
self.IE_NAME, response['message']), expected=True)
return response['data']
def _extract_common_video_info(self, video_data):
return {
'thumbnail': video_data.get('thumbnail'),
'episode_id': str_or_none(video_data.get('id')),
}
def _real_extract(self, url):
show_id, season_id, video_id = self._match_valid_url(url).groups()
response = self._download_info(
show_id, f'season/{season_id}',
video_id, query={'video_id': video_id})
vid = int(video_id)
video_data = next(filter(
lambda episode: episode.get('video_id') == vid, response))
title = video_data['title']
formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
def ds(h):
g = 'MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij'
def f(m):
l = []
o = 0
b = False
m_len = len(m)
while ((not b) and o < m_len):
n = m[o] << 2
o += 1
k = -1
j = -1
if o < m_len:
n += m[o] >> 4
o += 1
if o < m_len:
k = (m[o - 1] << 4) & 255
k += m[o] >> 2
o += 1
if o < m_len:
j = (m[o - 1] << 6) & 255
j += m[o]
o += 1
else:
b = True
else:
b = True
else:
b = True
l.append(n)
if k != -1:
l.append(k)
if j != -1:
l.append(j)
return l
c = []
for e in h:
c.append(g.index(e))
c_len = len(c)
for e in range(c_len * 2 - 1, -1, -1):
a = c[e % c_len] ^ c[(e + 1) % c_len]
c[e % c_len] = a
c = f(c)
d = ''
for e in c:
d += chr(e)
return d
info = {}
def metadata_from_url(r_url):
if not info and r_url:
mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
if mobj:
info['episode_number'] = int(mobj.group(2))
season_number = mobj.group(1)
if season_number:
info['season_number'] = int(season_number)
video_type = video_data.get('video_type')
is_youtube = False
for quality in ('', '_sd'):
embed_code = video_data.get('embed_info' + quality)
if not embed_code:
continue
embed_code = ds(embed_code)
if video_type == 'video/kenc':
embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
kenc = self._download_json(
'https://www.vvvvid.it/kenc', video_id, query={
'action': 'kt',
'conn_id': self._conn_id,
'url': embed_code,
}, fatal=False) or {}
kenc_message = kenc.get('message')
if kenc_message:
embed_code += '?' + ds(kenc_message)
formats.extend(self._extract_m3u8_formats(
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif video_type == 'video/rcs':
formats.extend(self._extract_akamai_formats(embed_code, video_id))
elif video_type == 'video/youtube':
info.update({
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'url': embed_code,
})
is_youtube = True
break
elif video_type == 'video/dash':
formats.extend(self._extract_m3u8_formats(
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.extend(self._extract_wowza_formats(
f'http://sb.top-ix.org/videomg/_definst_/mp4:{embed_code}/playlist.m3u8', video_id, skip_protocols=['f4m']))
metadata_from_url(embed_code)
if not is_youtube:
info['formats'] = formats
metadata_from_url(video_data.get('thumbnail'))
info.update(self._extract_common_video_info(video_data))
info.update({
'id': video_id,
'title': title,
'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'),
'season_id': season_id,
'episode': title,
'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')),
'repost_count': int_or_none(video_data.get('video_shares')),
})
return info
class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE
_VALID_URL = rf'(?P<base_url>{VVVVIDIE._VALID_URL_BASE}(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)'
_TESTS = [{
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
'info_dict': {
'id': '156',
'title': 'Psycho-Pass',
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
},
'playlist_count': 46,
}, {
'url': 'https://www.vvvvid.it/show/156',
'only_matching': True,
}]
def _real_extract(self, url):
base_url, show_id, show_title = self._match_valid_url(url).groups()
seasons = self._download_info(
show_id, 'seasons/', show_title)
show_info = self._download_info(
show_id, 'info/', show_title, fatal=False)
if not show_title:
base_url += '/title'
entries = []
for season in (seasons or []):
episodes = season.get('episodes') or []
playlist_title = season.get('name') or show_info.get('title')
for episode in episodes:
if episode.get('playable') is False:
continue
season_id = str_or_none(episode.get('season_id'))
video_id = str_or_none(episode.get('video_id'))
if not (season_id and video_id):
continue
info = self._extract_common_video_info(episode)
info.update({
'_type': 'url_transparent',
'ie_key': VVVVIDIE.ie_key(),
'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'),
'description': episode.get('description'),
'season_id': season_id,
'playlist_title': playlist_title,
})
entries.append(info)
return self.playlist_result(
entries, show_id, show_info.get('title'), show_info.get('description'))

View File

@ -347,35 +347,3 @@ class WDRElefantIE(InfoExtractor):
raise ExtractorError(
f'{display_id} is not a video', expected=True)
return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
class WDRMobileIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://mobile-ondemand\.wdr\.de/
.*?/fsk(?P<age_limit>[0-9]+)
/[0-9]+/[0-9]+/
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
IE_NAME = 'wdr:mobile'
_WORKING = False # no such domain
_TEST = {
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
'info_dict': {
'title': '4283021',
'id': '421735',
'ext': 'mp4',
'age_limit': 0,
},
'skip': 'Problems with loading data.',
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
return {
'id': mobj.group('id'),
'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')),
'url': url,
'http_headers': {
'User-Agent': 'mobile',
},
}

View File

@ -1,86 +0,0 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
parse_age_limit,
traverse_obj,
unified_timestamp,
url_or_none,
)
class WeyyakIE(InfoExtractor):
_VALID_URL = r'https?://weyyak\.com/(?P<lang>\w+)/(?:player/)?(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://weyyak.com/en/player/episode/1341952/Ribat-Al-Hob-Episode49',
'md5': '0caf55c1a615531c8fe60f146ae46849',
'info_dict': {
'id': '1341952',
'ext': 'mp4',
'title': 'Ribat Al Hob',
'duration': 2771,
'alt_title': 'رباط الحب',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 49',
'episode_number': 49,
'timestamp': 1485907200,
'upload_date': '20170201',
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
'categories': ['Drama', 'Thrillers', 'Romance'],
'tags': 'count:8',
},
},
{
'url': 'https://weyyak.com/en/movie/233255/8-Seconds',
'md5': 'fe740ae0f63e4d1c8a7fc147a410c564',
'info_dict': {
'id': '233255',
'ext': 'mp4',
'title': '8 Seconds',
'duration': 6490,
'alt_title': '8 ثواني',
'description': 'md5:45b83a155c30b49950624c7e99600b9d',
'age_limit': 15,
'release_year': 2015,
'timestamp': 1683106031,
'upload_date': '20230503',
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
'categories': ['Drama', 'Social'],
'cast': ['Ceylin Adiyaman', 'Esra Inal'],
},
},
]
def _real_extract(self, url):
video_id, lang, type_ = self._match_valid_url(url).group('id', 'lang', 'type')
path = 'episode/' if type_ == 'episode' else 'contents/moviedetails?contentkey='
data = self._download_json(
f'https://msapifo-prod-me.weyyak.z5.com/v1/{lang}/{path}{video_id}', video_id)['data']
m3u8_url = self._download_json(
f'https://api-weyyak.akamaized.net/get_info/{data["video_id"]}',
video_id, 'Extracting video details')['url_video']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('title', {str}),
'alt_title': ('translated_title', {str}),
'description': ('synopsis', {str}),
'duration': ('length', {float_or_none}),
'age_limit': ('age_rating', {parse_age_limit}),
'season_number': ('season_number', {int_or_none}),
'episode_number': ('episode_number', {int_or_none}),
'thumbnail': ('imagery', 'thumbnail', {url_or_none}),
'categories': ('genres', ..., {str}),
'tags': ('tags', ..., {str}),
'cast': (('main_actor', 'main_actress'), {str}),
'timestamp': ('insertedAt', {unified_timestamp}),
'release_year': ('production_year', {int_or_none}),
}),
}

View File

@ -1,114 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
int_or_none,
parse_iso8601,
xpath_text,
xpath_with_ns,
)
class XstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
xstream:|
https?://frontend\.xstream\.(?:dk|net)/
)
(?P<partner_id>[^/]+)
(?:
:|
/feed/video/\?.*?\bid=
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': {
'id': '86588',
'ext': 'mov',
'title': 'Otto Wollertsen',
'description': 'Vestlendingen Otto Fredrik Wollertsen',
'timestamp': 1430473209,
'upload_date': '20150501',
},
}, {
'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
'only_matching': True,
}]
def _extract_video_info(self, partner_id, video_id):
data = self._download_xml(
f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}',
video_id)
NS_MAP = {
'atom': 'http://www.w3.org/2005/Atom',
'xt': 'http://xstream.dk/',
'media': 'http://search.yahoo.com/mrss/',
}
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
title = xpath_text(
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
description = xpath_text(
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
timestamp = parse_iso8601(xpath_text(
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
formats = []
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
media_url = media_content.get('url')
if not media_url:
continue
tbr = int_or_none(media_content.get('bitrate'))
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
if mobj:
formats.append({
'url': mobj.group('url'),
'play_path': 'mp4:{}'.format(mobj.group('playpath')),
'app': mobj.group('app'),
'ext': 'flv',
'tbr': tbr,
'format_id': 'rtmp-%d' % tbr,
})
else:
formats.append({
'url': media_url,
'tbr': tbr,
})
link = find_xpath_attr(
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
if link is not None:
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
'quality': 1,
})
thumbnails = [{
'url': splash.get('url'),
'width': int_or_none(splash.get('width')),
'height': int_or_none(splash.get('height')),
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
return self._extract_video_info(partner_id, video_id)

View File

@ -1,269 +0,0 @@
import json
import time
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
str_or_none,
try_call,
try_get,
unified_strdate,
unified_timestamp,
url_or_none,
)
class Zee5IE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
zee5:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:
(?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
|(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+
)/(?P<display_id>[^#/?]+)/
)
(?P<id>[^#/?]+)/?(?:$|[?#])
'''
_TESTS = [{
'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
'info_dict': {
'id': '0-0-movie_1143162669',
'ext': 'mp4',
'display_id': 'adavari-matalaku-ardhale-verule',
'title': 'Adavari Matalaku Ardhale Verule',
'duration': 9360,
'description': str,
'alt_title': 'Adavari Matalaku Ardhale Verule',
'uploader': 'Zee Entertainment Enterprises Ltd',
'release_date': '20070427',
'upload_date': '20070427',
'timestamp': 1177632000,
'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 0,
'episode': 'Episode 0',
'tags': list,
},
'params': {
'format': 'bv',
},
}, {
'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
'info_dict': {
'id': '0-1-239839',
'ext': 'mp4',
'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
'duration': 659,
'description': str,
'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
'uploader': 'Zee Entertainment Enterprises Ltd',
'release_date': '20150101',
'upload_date': '20150101',
'timestamp': 1420070400,
'thumbnail': r're:^https?://.*\.jpg$',
'series': 'Bandbudh Aur Budbak',
'season_number': 1,
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'tags': list,
},
'params': {
'format': 'bv',
},
}, {
'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
'only_matching': True,
}]
_DEVICE_ID = str(uuid.uuid4())
_USER_TOKEN = None
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
_NETRC_MACHINE = 'zee5'
_GEO_COUNTRIES = ['IN']
_USER_COUNTRY = None
def _perform_login(self, username, password):
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
self.report_login()
otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
None, note='Sending OTP')
if otp_request_json['code'] == 0:
self.to_screen(otp_request_json['message'])
else:
raise ExtractorError(otp_request_json['message'], expected=True)
otp_code = self._get_tfa_info('OTP')
otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
None, note='Verifying OTP', fatal=False)
if not otp_verify_json:
raise ExtractorError('Unable to verify OTP.', expected=True)
self._USER_TOKEN = otp_verify_json.get('token')
if not self._USER_TOKEN:
raise ExtractorError(otp_request_json['message'], expected=True)
elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
self._USER_TOKEN = password
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
token = jwt_decode_hs256(self._USER_TOKEN)
if token.get('exp', 0) <= int(time.time()):
raise ExtractorError('User token has expired', expected=True)
self._USER_COUNTRY = token.get('current_country')
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
access_token_request = self._download_json(
'https://launchapi.zee5.com/launch?platform_name=web_app',
video_id, note='Downloading access token')['platform_token']
data = {
'x-access-token': access_token_request['token'],
}
if self._USER_TOKEN:
data['Authorization'] = f'bearer {self._USER_TOKEN}'
else:
data['X-Z5-Guest-Token'] = self._DEVICE_ID
json_data = self._download_json(
'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
'content_id': video_id,
'device_id': self._DEVICE_ID,
'platform_name': 'desktop_web',
'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
'check_parental_control': False,
}, headers={'content-type': 'application/json'}, data=json.dumps(data).encode())
asset_data = json_data['assetDetails']
show_data = json_data.get('showDetails', {})
if 'premium' in asset_data['business_type']:
raise ExtractorError('Premium content is DRM protected.', expected=True)
if not asset_data.get('hls_url'):
self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
subtitles = {}
for sub in asset_data.get('subtitle_url', []):
sub_url = sub.get('url')
if not sub_url:
continue
subtitles.setdefault(sub.get('language', 'en'), []).append({
'url': self._proto_relative_url(sub_url),
})
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
return {
'id': video_id,
'display_id': display_id,
'title': asset_data['title'],
'formats': formats,
'subtitles': subtitles,
'duration': int_or_none(asset_data.get('duration')),
'description': str_or_none(asset_data.get('description')),
'alt_title': str_or_none(asset_data.get('original_title')),
'uploader': str_or_none(asset_data.get('content_owner')),
'age_limit': parse_age_limit(asset_data.get('age_rating')),
'release_date': unified_strdate(asset_data.get('release_date')),
'timestamp': unified_timestamp(asset_data.get('release_date')),
'thumbnail': url_or_none(asset_data.get('image_url')),
'series': str_or_none(asset_data.get('tvshow_name')),
'season': try_get(show_data, lambda x: x['seasons']['title'], str),
'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
'tags': try_get(asset_data, lambda x: x['tags'], list),
}
class Zee5SeriesIE(InfoExtractor):
IE_NAME = 'zee5:series'
_VALID_URL = r'''(?x)
(?:
zee5:series:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2}
)
(?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
'''
_TESTS = [{
'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
'playlist_mincount': 156,
'info_dict': {
'id': '0-6-1899',
},
}, {
'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
'playlist_mincount': 1500,
'info_dict': {
'id': '0-6-199',
},
}, {
'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
'playlist_mincount': 24,
'info_dict': {
'id': '0-6-965',
},
}, {
'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
'playlist_mincount': 3,
'info_dict': {
'id': '0-6-3201',
},
}, {
'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
'playlist_mincount': 150,
'info_dict': {
'id': '0-6-270',
},
}, {
'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
'only_matching': True,
}, {
'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
'only_matching': True,
}]
def _entries(self, show_id):
access_token_request = self._download_json(
'https://launchapi.zee5.com/launch?platform_name=web_app',
show_id, note='Downloading access token')['platform_token']
headers = {
'X-Access-Token': access_token_request['token'],
'Referer': 'https://www.zee5.com/',
}
show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
page_num = 0
show_json = self._download_json(show_url, video_id=show_id, headers=headers)
for season in show_json.get('seasons') or []:
season_id = try_get(season, lambda x: x['id'], str)
next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
while next_url:
page_num += 1
episodes_json = self._download_json(
next_url, video_id=show_id, headers=headers,
note=f'Downloading JSON metadata page {page_num}')
for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
video_id = episode.get('id')
yield self.url_result(
f'zee5:{video_id}',
ie=Zee5IE.ie_key(), video_id=video_id)
next_url = url_or_none(episodes_json.get('next_episode_api'))
def _real_extract(self, url):
show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)