mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-06-15 07:14:59 +00:00
[cleanup] Remove dead extractors (#16137)
Closes #2623 Closes #2679 Closes #2821 Closes #3416 Closes #4828 Closes #4939 Closes #5421 Closes #7064 Closes #7264 Closes #7654 Closes #8075 Closes #8798 Closes #9313 Closes #9617 Closes #10162 Closes #10252 Closes #10264 Closes #15640 Authored by: doe1080, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
e85da3b985
commit
3ba1534fa3
@ -16,7 +16,6 @@ from yt_dlp.extractor import (
|
||||
CeskaTelevizeIE,
|
||||
DailymotionIE,
|
||||
DemocracynowIE,
|
||||
LyndaIE,
|
||||
RaiPlayIE,
|
||||
RTVEALaCartaIE,
|
||||
TedTalkIE,
|
||||
@ -250,20 +249,6 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
@unittest.skip('IE broken')
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), {'en'})
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
@is_download_test
|
||||
@unittest.skip('IE broken')
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
|
||||
@ -54,7 +54,6 @@ from .agora import (
|
||||
WyborczaPodcastIE,
|
||||
WyborczaVideoIE,
|
||||
)
|
||||
from .airtv import AirTVIE
|
||||
from .aitube import AitubeKZVideoIE
|
||||
from .alibaba import AlibabaIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
@ -65,10 +64,6 @@ from .allstar import (
|
||||
AllstarProfileIE,
|
||||
)
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .alsace20tv import (
|
||||
Alsace20TVEmbedIE,
|
||||
Alsace20TVIE,
|
||||
)
|
||||
from .altcensored import (
|
||||
AltCensoredChannelIE,
|
||||
AltCensoredIE,
|
||||
@ -93,7 +88,6 @@ from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .anchorfm import AnchorFMEpisodeIE
|
||||
from .angel import AngelIE
|
||||
from .antenna import (
|
||||
Ant1NewsGrArticleIE,
|
||||
@ -106,10 +100,6 @@ from .apa import APAIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .applepodcasts import ApplePodcastsIE
|
||||
from .appletrailers import (
|
||||
AppleTrailersIE,
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import (
|
||||
ArchiveOrgIE,
|
||||
YoutubeWebArchiveIE,
|
||||
@ -140,7 +130,6 @@ from .asobichannel import (
|
||||
from .asobistage import AsobiStageIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atvat import ATVAtIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiodraft import (
|
||||
@ -157,13 +146,6 @@ from .audius import (
|
||||
AudiusProfileIE,
|
||||
AudiusTrackIE,
|
||||
)
|
||||
from .awaan import (
|
||||
AWAANIE,
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
AWAANVideoIE,
|
||||
)
|
||||
from .axs import AxsIE
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
@ -190,10 +172,6 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
)
|
||||
from .beatport import BeatportIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
@ -210,7 +188,6 @@ from .bibeltv import (
|
||||
BibelTVSeriesIE,
|
||||
BibelTVVideoIE,
|
||||
)
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
@ -255,7 +232,6 @@ from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
@ -288,14 +264,8 @@ from .businessinsider import BusinessInsiderIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .caffeinetv import CaffeineTVIE
|
||||
from .callin import CallinIE
|
||||
from .caltrans import CaltransIE
|
||||
from .cam4 import CAM4IE
|
||||
from .camdemy import (
|
||||
CamdemyFolderIE,
|
||||
CamdemyIE,
|
||||
)
|
||||
from .camfm import (
|
||||
CamFMEpisodeIE,
|
||||
CamFMShowIE,
|
||||
@ -371,7 +341,6 @@ from .ciscolive import (
|
||||
from .ciscowebex import CiscoWebexIE
|
||||
from .cjsw import CJSWIE
|
||||
from .clipchamp import ClipchampIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
@ -395,7 +364,6 @@ from .commonprotocols import (
|
||||
ViewSourceIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .coub import CoubIE
|
||||
from .cozytv import CozyTVIE
|
||||
@ -510,7 +478,6 @@ from .dplay import (
|
||||
)
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drooble import DroobleIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dropout import (
|
||||
DropoutIE,
|
||||
@ -525,10 +492,6 @@ from .drtv import (
|
||||
DRTVSeriesIE,
|
||||
)
|
||||
from .dtube import DTubeIE
|
||||
from .duboku import (
|
||||
DubokuIE,
|
||||
DubokuPlaylistIE,
|
||||
)
|
||||
from .dumpert import DumpertIE
|
||||
from .duoplay import DuoplayIE
|
||||
from .dvtv import DVTVIE
|
||||
@ -546,8 +509,6 @@ from .eggs import (
|
||||
EggsArtistIE,
|
||||
EggsIE,
|
||||
)
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
from .elpais import ElPaisIE
|
||||
@ -591,7 +552,6 @@ from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
from .euscreen import EUScreenIE
|
||||
from .expressen import ExpressenIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import (
|
||||
FacebookAdsIE,
|
||||
FacebookIE,
|
||||
@ -655,7 +615,6 @@ from .foxnews import (
|
||||
from .foxsports import FoxSportsIE
|
||||
from .fptplay import FptplayIE
|
||||
from .francaisfacile import FrancaisFacileIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
FranceTVIE,
|
||||
FranceTVInfoIE,
|
||||
@ -672,14 +631,10 @@ from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fuyintv import FuyinTVIE
|
||||
from .gab import (
|
||||
GabIE,
|
||||
GabTVIE,
|
||||
)
|
||||
from .gab import GabIE
|
||||
from .gaia import GaiaIE
|
||||
from .gamedevtv import GameDevTVDashboardIE
|
||||
from .gamejolt import (
|
||||
@ -743,16 +698,10 @@ from .googledrive import (
|
||||
GoogleDriveFolderIE,
|
||||
GoogleDriveIE,
|
||||
)
|
||||
from .googlepodcasts import (
|
||||
GooglePodcastsFeedIE,
|
||||
GooglePodcastsIE,
|
||||
)
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .goplay import GoPlayIE
|
||||
from .gopro import GoProIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .graspop import GraspopIE
|
||||
from .gronkh import (
|
||||
GronkhFeedIE,
|
||||
@ -769,7 +718,6 @@ from .hgtv import HGTVComShowIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hketv import HKETVIE
|
||||
from .hollywoodreporter import (
|
||||
HollywoodReporterIE,
|
||||
HollywoodReporterPlaylistIE,
|
||||
@ -818,7 +766,6 @@ from .idagio import (
|
||||
IdagioRecordingIE,
|
||||
IdagioTrackIE,
|
||||
)
|
||||
from .idolplus import IdolPlusIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
IGNArticleIE,
|
||||
@ -851,7 +798,6 @@ from .instagram import (
|
||||
InstagramUserIE,
|
||||
)
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import (
|
||||
IPrimaCNNIE,
|
||||
IPrimaIE,
|
||||
@ -886,7 +832,6 @@ from .iwara import (
|
||||
IwaraUserIE,
|
||||
)
|
||||
from .ixigua import IxiguaIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jamendo import (
|
||||
JamendoAlbumIE,
|
||||
JamendoIE,
|
||||
@ -939,11 +884,9 @@ from .kika import (
|
||||
KikaIE,
|
||||
KikaPlaylistIE,
|
||||
)
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
from .kompas import KompasVideoIE
|
||||
from .koo import KooIE
|
||||
from .krasview import KrasViewIE
|
||||
from .kth import KTHIE
|
||||
from .ku6 import Ku6IE
|
||||
@ -991,7 +934,6 @@ from .lecturio import (
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .lefigaro import (
|
||||
LeFigaroVideoEmbedIE,
|
||||
@ -1020,11 +962,6 @@ from .liputan6 import Liputan6IE
|
||||
from .listennotes import ListenNotesIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnk import LnkIE
|
||||
from .locipo import (
|
||||
@ -1048,10 +985,6 @@ from .lsm import (
|
||||
LSMReplayIE,
|
||||
)
|
||||
from .lumni import LumniIE
|
||||
from .lynda import (
|
||||
LyndaCourseIE,
|
||||
LyndaIE,
|
||||
)
|
||||
from .maariv import MaarivIE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik import MagentaMusikIE
|
||||
@ -1117,13 +1050,11 @@ from .microsoftembed import (
|
||||
MicrosoftLearnSessionIE,
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .minds import (
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
MindsIE,
|
||||
)
|
||||
from .minoto import MinotoIE
|
||||
from .mir24tv import Mir24TvIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
@ -1157,18 +1088,9 @@ from .mlb import (
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
MotherlessGalleryIE,
|
||||
MotherlessGroupIE,
|
||||
MotherlessIE,
|
||||
MotherlessUploaderIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
from .moview import MoviewPlayIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import MTVIE
|
||||
@ -1179,12 +1101,6 @@ from .murrtube import (
|
||||
)
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexAlbumIE,
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
MusicdexSongIE,
|
||||
)
|
||||
from .mux import MuxIE
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
@ -1212,14 +1128,10 @@ from .nate import (
|
||||
NateIE,
|
||||
NateProgramIE,
|
||||
)
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicTVIE,
|
||||
NationalGeographicVideoIE,
|
||||
)
|
||||
from .nationalgeographic import NationalGeographicTVIE
|
||||
from .naver import (
|
||||
NaverIE,
|
||||
NaverLiveIE,
|
||||
NaverNowIE,
|
||||
)
|
||||
from .nba import (
|
||||
NBAIE,
|
||||
@ -1257,7 +1169,6 @@ from .nebula import (
|
||||
NebulaSubscriptionsIE,
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nest import (
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
@ -1275,11 +1186,6 @@ from .neteasemusic import (
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicSingerIE,
|
||||
)
|
||||
from .netverse import (
|
||||
NetverseIE,
|
||||
NetversePlaylistIE,
|
||||
NetverseSearchIE,
|
||||
)
|
||||
from .netzkino import NetzkinoIE
|
||||
from .newgrounds import (
|
||||
NewgroundsIE,
|
||||
@ -1389,11 +1295,6 @@ from .ntvcojp import NTVCoJpCUIE
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nubilesporn import NubilesPornIE
|
||||
from .nuum import (
|
||||
NuumLiveIE,
|
||||
NuumMediaIE,
|
||||
NuumTabIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
@ -1426,7 +1327,6 @@ from .onet import (
|
||||
OnetMVPIE,
|
||||
OnetPlIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .onsen import OnsenIE
|
||||
from .opencast import (
|
||||
OpencastIE,
|
||||
@ -1437,7 +1337,6 @@ from .openrec import (
|
||||
OpenRecIE,
|
||||
OpenRecMovieIE,
|
||||
)
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFIPTVIE,
|
||||
ORFONIE,
|
||||
@ -1511,26 +1410,18 @@ from .pinterest import (
|
||||
PinterestCollectionIE,
|
||||
PinterestIE,
|
||||
)
|
||||
from .piramidetv import (
|
||||
PiramideTVChannelIE,
|
||||
PiramideTVIE,
|
||||
)
|
||||
from .planetmarathi import PlanetMarathiIE
|
||||
from .platzi import (
|
||||
PlatziCourseIE,
|
||||
PlatziIE,
|
||||
)
|
||||
from .playerfm import PlayerFmIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .playsuisse import PlaySuisseIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playwire import PlaywireIE
|
||||
from .pluralsight import (
|
||||
PluralsightCourseIE,
|
||||
PluralsightIE,
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .plyr import PlyrEmbedIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
@ -1574,7 +1465,6 @@ from .prankcast import (
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .prx import (
|
||||
PRXAccountIE,
|
||||
PRXSeriesIE,
|
||||
@ -1586,7 +1476,6 @@ from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
@ -1610,10 +1499,6 @@ from .radiocanada import (
|
||||
RadioCanadaAudioVideoIE,
|
||||
RadioCanadaIE,
|
||||
)
|
||||
from .radiocomercial import (
|
||||
RadioComercialIE,
|
||||
RadioComercialPlaylistIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
@ -1678,7 +1563,6 @@ from .redbulltv import (
|
||||
RedBullTVRrnContentIE,
|
||||
)
|
||||
from .reddit import RedditIE
|
||||
from .redge import RedCDNLivxIE
|
||||
from .redgifs import (
|
||||
RedGifsIE,
|
||||
RedGifsSearchIE,
|
||||
@ -1692,13 +1576,11 @@ from .rentv import (
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .ridehome import RideHomeIE
|
||||
from .rinsefm import (
|
||||
RinseFMArtistPlaylistIE,
|
||||
RinseFMIE,
|
||||
)
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
RokfinChannelIE,
|
||||
@ -1815,7 +1697,6 @@ from .senategov import (
|
||||
SenateGovIE,
|
||||
SenateISVPIE,
|
||||
)
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
@ -1828,7 +1709,6 @@ from .shahid import (
|
||||
ShahidShowIE,
|
||||
)
|
||||
from .sharepoint import SharePointIE
|
||||
from .sharevideos import ShareVideosEmbedIE
|
||||
from .shemaroome import ShemarooMeIE
|
||||
from .shiey import ShieyIE
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
@ -1873,7 +1753,6 @@ from .smotrim import (
|
||||
SmotrimPlaylistIE,
|
||||
)
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .softwhiteunderbelly import SoftWhiteUnderbellyIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@ -1923,7 +1802,6 @@ from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerShowIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
@ -1940,7 +1818,6 @@ from .stacommu import (
|
||||
TheaterComplexTownVODIE,
|
||||
)
|
||||
from .stageplus import StagePlusVODConcertIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startrek import StarTrekIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
@ -1948,10 +1825,6 @@ from .steam import (
|
||||
SteamCommunityIE,
|
||||
SteamIE,
|
||||
)
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
StitcherShowIE,
|
||||
)
|
||||
from .storyfire import (
|
||||
StoryFireIE,
|
||||
StoryFireSeriesIE,
|
||||
@ -1961,7 +1834,6 @@ from .streaks import StreaksIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .stretchinternet import StretchInternetIE
|
||||
from .stripchat import StripchatIE
|
||||
from .stv import STVPlayerIE
|
||||
from .subsplash import (
|
||||
@ -1979,8 +1851,6 @@ from .svt import (
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swearnet import SwearnetEpisodeIE
|
||||
from .syvdk import SYVDKIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .taptap import (
|
||||
@ -2039,10 +1909,6 @@ from .telequebec import (
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tempo import (
|
||||
IVXPlayerIE,
|
||||
TempoIE,
|
||||
)
|
||||
from .tencent import (
|
||||
IflixEpisodeIE,
|
||||
IflixSeriesIE,
|
||||
@ -2068,7 +1934,6 @@ from .theguardian import (
|
||||
TheGuardianPodcastPlaylistIE,
|
||||
)
|
||||
from .thehighwire import TheHighWireIE
|
||||
from .theholetv import TheHoleTvIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
ThePlatformFeedIE,
|
||||
@ -2120,12 +1985,6 @@ from .toypics import (
|
||||
ToypicsIE,
|
||||
ToypicsUserIE,
|
||||
)
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerShortIE,
|
||||
TrillerUserIE,
|
||||
)
|
||||
from .trovo import (
|
||||
TrovoChannelClipIE,
|
||||
TrovoChannelVodIE,
|
||||
@ -2208,7 +2067,6 @@ from .tvplay import (
|
||||
TVPlayHomeIE,
|
||||
TVPlayIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tvw import (
|
||||
TvwIE,
|
||||
TvwNewsIE,
|
||||
@ -2248,12 +2106,8 @@ from .udemy import (
|
||||
UdemyIE,
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ufctv import (
|
||||
UFCTVIE,
|
||||
UFCArabiaIE,
|
||||
)
|
||||
from .ufctv import UFCTVIE
|
||||
from .ukcolumn import UkColumnIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .uliza import (
|
||||
UlizaPlayerIE,
|
||||
UlizaPortalIE,
|
||||
@ -2283,7 +2137,6 @@ from .ustudio import (
|
||||
UstudioEmbedIE,
|
||||
UstudioIE,
|
||||
)
|
||||
from .utreon import UtreonIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veo import VeoIE
|
||||
@ -2308,20 +2161,7 @@ from .videocampus_sachsen import (
|
||||
VideocampusSachsenIE,
|
||||
ViMPPlaylistIE,
|
||||
)
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videoken import (
|
||||
VideoKenCategoryIE,
|
||||
VideoKenIE,
|
||||
VideoKenPlayerIE,
|
||||
VideoKenPlaylistIE,
|
||||
VideoKenTopicIE,
|
||||
)
|
||||
from .videomore import (
|
||||
VideomoreIE,
|
||||
VideomoreSeasonIE,
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videoken import VideoKenPlayerIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
@ -2351,10 +2191,6 @@ from .vimeo import (
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
)
|
||||
from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .visir import VisirIE
|
||||
@ -2372,7 +2208,6 @@ from .vk import (
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicy import (
|
||||
VoicyChannelIE,
|
||||
@ -2404,11 +2239,6 @@ from .vtv import (
|
||||
VTVIE,
|
||||
VTVGoIE,
|
||||
)
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostArticleIE,
|
||||
@ -2418,7 +2248,6 @@ from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRElefantIE,
|
||||
WDRMobileIE,
|
||||
WDRPageIE,
|
||||
)
|
||||
from .webcamerapl import WebcameraplIE
|
||||
@ -2445,7 +2274,6 @@ from .weverse import (
|
||||
WeverseMomentIE,
|
||||
)
|
||||
from .wevidi import WeVidiIE
|
||||
from .weyyak import WeyyakIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
@ -2494,7 +2322,6 @@ from .ximalaya import (
|
||||
from .xinpianchang import XinpianchangIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
from .xvideos import (
|
||||
XVideosIE,
|
||||
XVideosQuickiesIE,
|
||||
@ -2618,10 +2445,6 @@ from .zdf import (
|
||||
ZDFIE,
|
||||
ZDFChannelIE,
|
||||
)
|
||||
from .zee5 import (
|
||||
Zee5IE,
|
||||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zenporn import ZenPornIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
|
||||
@ -1,96 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class AirTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# without youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
|
||||
'info_dict': {
|
||||
'id': 'W87jcWleSn2hXZN47zJZsQ',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20221003',
|
||||
'release_timestamp': 1664792603,
|
||||
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
|
||||
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
|
||||
'upload_date': '20221003',
|
||||
'duration': 151,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
},
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
'info_dict': {
|
||||
'id': '2ZTqmpee-bQ',
|
||||
'ext': 'mp4',
|
||||
'comment_count': int,
|
||||
'tags': 'count:11',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Newsflare',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'title': 'Geese Chase Alligator Across Golf Course',
|
||||
'uploader_id': 'NewsflareBreaking',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
|
||||
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
|
||||
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
|
||||
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({'url': source.get('src'), 'ext': ext})
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
|
||||
if nextjs_json.get('youtube_id'):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': nextjs_json.get('description') or None,
|
||||
'duration': int_or_none(nextjs_json.get('duration')),
|
||||
'thumbnails': [
|
||||
{'url': thumbnail}
|
||||
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
|
||||
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
|
||||
'timestamp': parse_iso8601(nextjs_json.get('created')),
|
||||
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
|
||||
'view_count': int_or_none(nextjs_json.get('views')),
|
||||
}
|
||||
@ -1,83 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
formats = []
|
||||
for res, fmt_url in (info.get('files') or {}).items():
|
||||
formats.extend(
|
||||
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||
if '/smil:_' in fmt_url
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||
'view_count': int_or_none(info.get('nb_vues')),
|
||||
}
|
||||
|
||||
|
||||
class Alsace20TVIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 1073,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id, url)
|
||||
|
||||
|
||||
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id)
|
||||
@ -1,98 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class AnchorFMEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://anchor\.fm/(?P<channel_name>\w+)/(?:embed/)?episodes/[\w-]+-(?P<episode_id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d',
|
||||
'info_dict': {
|
||||
'id': 'e1tpt3d',
|
||||
'ext': 'mp3',
|
||||
'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!',
|
||||
'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg',
|
||||
'duration': 624.718,
|
||||
'uploader': 'Lovelyti ',
|
||||
'uploader_id': '991541',
|
||||
'channel': 'lovelyti',
|
||||
'modified_date': '20230121',
|
||||
'modified_timestamp': 1674285178,
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'duration': 1042.008,
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'release_date': '20221221',
|
||||
'release_timestamp': 1671595916,
|
||||
'modified_date': '20221221',
|
||||
'modified_timestamp': 1671590834,
|
||||
'channel': 'apakatatempo',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'uploader_id': '2585461',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong',
|
||||
'info_dict': {
|
||||
'id': 'e1shjqd',
|
||||
'ext': 'mp3',
|
||||
'release_date': '20221221',
|
||||
'duration': 1042.008,
|
||||
'season': 'Season 2',
|
||||
'modified_timestamp': 1671590834,
|
||||
'uploader_id': '2585461',
|
||||
'modified_date': '20221221',
|
||||
'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41',
|
||||
'season_number': 2,
|
||||
'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong',
|
||||
'release_timestamp': 1671595916,
|
||||
'episode_id': 'e1shjqd',
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id')
|
||||
api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': traverse_obj(api_data, ('episode', 'title')),
|
||||
'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')),
|
||||
'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)),
|
||||
'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000),
|
||||
'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))),
|
||||
'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))),
|
||||
'episode_id': episode_id,
|
||||
'uploader': traverse_obj(api_data, ('creator', 'name')),
|
||||
'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))),
|
||||
'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))),
|
||||
'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')),
|
||||
}
|
||||
@ -1,277 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
IE_NAME = 'appletrailers'
|
||||
_VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
'id': '5111',
|
||||
'title': 'Man of Steel',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer4',
|
||||
'ext': 'mov',
|
||||
'duration': 111,
|
||||
'title': 'Trailer 4',
|
||||
'upload_date': '20130523',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer3',
|
||||
'ext': 'mov',
|
||||
'duration': 182,
|
||||
'title': 'Trailer 3',
|
||||
'upload_date': '20130417',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'd0f1e1150989b9924679b441f3404d48',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer',
|
||||
'ext': 'mov',
|
||||
'duration': 148,
|
||||
'title': 'Trailer',
|
||||
'upload_date': '20121212',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '5fe08795b943eb2e757fa95cb6def1cb',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-teaser',
|
||||
'ext': 'mov',
|
||||
'duration': 93,
|
||||
'title': 'Teaser',
|
||||
'upload_date': '20120721',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
'id': '4489',
|
||||
'title': 'Blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||
'info_dict': {
|
||||
'id': '15881',
|
||||
'title': 'Kung Fu Panda 3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
entries = []
|
||||
for clip in film_data.get('clips', []):
|
||||
clip_title = clip['title']
|
||||
|
||||
formats = []
|
||||
for version, version_data in clip.get('versions', {}).items():
|
||||
for size, size_data in version_data.get('sizes', {}).items():
|
||||
src = size_data.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': f'{version}-{size}',
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
'formats': formats,
|
||||
'title': clip_title,
|
||||
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||
'upload_date': unified_strdate(clip.get('posted')),
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
|
||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '''))
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
return f'<html>{s}</html>'
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
for li in doc.findall('./div/ul/li'):
|
||||
on_click = li.find('.//a').attrib['onClick']
|
||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||
on_click, 'trailer info')
|
||||
trailer_info = json.loads(trailer_info_json)
|
||||
first_url = trailer_info.get('url')
|
||||
if not first_url:
|
||||
continue
|
||||
title = trailer_info['title']
|
||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||
thumbnail = li.find('.//img').attrib['src']
|
||||
upload_date = trailer_info['posted'].replace('-', '')
|
||||
|
||||
runtime = trailer_info['runtime']
|
||||
m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
|
||||
duration = None
|
||||
if m:
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for fmt in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': fmt['type'],
|
||||
'width': int_or_none(fmt['width']),
|
||||
'height': int_or_none(fmt['height']),
|
||||
})
|
||||
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'http_headers': {
|
||||
'User-Agent': 'QuickTime compatible (yt-dlp)',
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': movie,
|
||||
'entries': playlist,
|
||||
}
|
||||
|
||||
|
||||
class AppleTrailersSectionIE(InfoExtractor):
|
||||
IE_NAME = 'appletrailers:section'
|
||||
_SECTIONS = {
|
||||
'justadded': {
|
||||
'feed_path': 'just_added',
|
||||
'title': 'Just Added',
|
||||
},
|
||||
'exclusive': {
|
||||
'feed_path': 'exclusive',
|
||||
'title': 'Exclusive',
|
||||
},
|
||||
'justhd': {
|
||||
'feed_path': 'just_hd',
|
||||
'title': 'Just HD',
|
||||
},
|
||||
'mostpopular': {
|
||||
'feed_path': 'most_pop',
|
||||
'title': 'Most Popular',
|
||||
},
|
||||
'moviestudios': {
|
||||
'feed_path': 'studios',
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
'title': 'Just Added',
|
||||
'id': 'justadded',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=exclusive',
|
||||
'info_dict': {
|
||||
'title': 'Exclusive',
|
||||
'id': 'exclusive',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=justhd',
|
||||
'info_dict': {
|
||||
'title': 'Just HD',
|
||||
'id': 'justhd',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=mostpopular',
|
||||
'info_dict': {
|
||||
'title': 'Most Popular',
|
||||
'id': 'mostpopular',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=moviestudios',
|
||||
'info_dict': {
|
||||
'title': 'Movie Studios',
|
||||
'id': 'moviestudios',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
for e in section_data]
|
||||
return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
|
||||
@ -1,107 +0,0 @@
|
||||
import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ATVAtIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
|
||||
'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
|
||||
'info_dict': {
|
||||
'id': 'v-ce9cgn1e70n5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
|
||||
_ACCESS_ID = 'x_atv'
|
||||
_ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
|
||||
|
||||
def _extract_video_info(self, url, content, video):
|
||||
clip_id = content.get('splitId', content['id'])
|
||||
formats = []
|
||||
clip_urls = video['urls']
|
||||
for protocol, variant in clip_urls.items():
|
||||
source_url = try_get(variant, lambda x: x['clear']['url'])
|
||||
if not source_url:
|
||||
continue
|
||||
if protocol == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, clip_id, mpd_id=protocol, fatal=False))
|
||||
elif protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=protocol, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': protocol,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': content.get('title'),
|
||||
'duration': float_or_none(content.get('duration')),
|
||||
'series': content.get('tvShowTitle'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_data = self._parse_json(
|
||||
self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
|
||||
video_id=video_id)
|
||||
|
||||
video_title = json_data['views']['default']['page']['title']
|
||||
content_resource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = content_resource[0]['id']
|
||||
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id_, content in enumerate(content_resource)]
|
||||
|
||||
time_of_request = dt.datetime.now()
|
||||
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||
expire = time_of_request + dt.timedelta(minutes=5)
|
||||
payload = {
|
||||
'content_ids': {
|
||||
content_id: content_ids,
|
||||
},
|
||||
'secure_delivery': True,
|
||||
'iat': int(time_of_request.timestamp()),
|
||||
'nbf': int(not_before.timestamp()),
|
||||
'exp': int(expire.timestamp()),
|
||||
}
|
||||
videos = self._download_json(
|
||||
'https://vas-v4.p7s1video.net/4.0/getsources',
|
||||
content_id, 'Downloading videos JSON', query={
|
||||
'token': jwt_encode(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID}),
|
||||
})
|
||||
|
||||
video_id, videos_data = next(iter(videos['data'].items()))
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, content_resource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'entries': entries,
|
||||
}
|
||||
@ -1,181 +0,0 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AWAANIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = self._match_valid_url(url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
f'http://awaan.ae/program/season/{season_id}',
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
|
||||
|
||||
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
def _parse_video_data(self, video_data, video_id, is_live):
|
||||
title = video_data.get('title_en') or video_data['title_ar']
|
||||
img = video_data.get('img')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
'uploader_id': video_data.get('user_id'),
|
||||
}
|
||||
|
||||
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'md5': '5f61c33bfc7794315c671a62d43116aa',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
'uploader_id': '71',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloVideo',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AWAANLiveIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://awaan.ae/live/6/dubai-tv',
|
||||
'info_dict': {
|
||||
'id': '6',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
'uploader_id': '71',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_data = self._download_json(
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloLive',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AWAANSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'awaan:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '7910',
|
||||
'title': 'محاضرات الشيخ الشعراوي',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
show_id, season_id = self._match_valid_url(url).groups()
|
||||
|
||||
data = {}
|
||||
if season_id:
|
||||
data['season'] = season_id
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
season = self._download_json(
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
show = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
for season in show['seasons']:
|
||||
if season['id'] == season_id:
|
||||
title = season.get('title_en') or season['title_ar']
|
||||
|
||||
entries = []
|
||||
for video in show['videos']:
|
||||
video_id = str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
||||
@ -1,89 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season_number': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@ -1,111 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE, YoutubeTabIE
|
||||
|
||||
|
||||
class BeatBumpVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://beatbump\.(?:ml|io)/listen\?id=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
|
||||
'md5': '5ff3fff41d3935b9810a9731e485fe66',
|
||||
'info_dict': {
|
||||
'id': 'MgNrAu2pzNs',
|
||||
'ext': 'mp4',
|
||||
'artist': 'Stephen',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'upload_date': '20190312',
|
||||
'categories': ['Music'],
|
||||
'playable_in_embed': True,
|
||||
'duration': 169,
|
||||
'like_count': int,
|
||||
'alt_title': 'Voyeur Girl',
|
||||
'view_count': int,
|
||||
'track': 'Voyeur Girl',
|
||||
'uploader': 'Stephen',
|
||||
'title': 'Voyeur Girl',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'album': 'it\'s too much love to know my dear',
|
||||
'channel': 'Stephen',
|
||||
'comment_count': int,
|
||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||
'tags': 'count:11',
|
||||
'creator': 'Stephen',
|
||||
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatbump.io/listen?id=LDGZAprNGWo',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
|
||||
|
||||
|
||||
class BeatBumpPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
|
||||
'playlist_count': 50,
|
||||
'info_dict': {
|
||||
'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
|
||||
'availability': 'unlisted',
|
||||
'view_count': int,
|
||||
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'modified_date': '20231110',
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': '@NoCopyrightSounds',
|
||||
'channel_follower_count': int,
|
||||
'title': 'NoCopyrightSounds',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': 'count:65',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': '@NoCopyrightSounds',
|
||||
'title': 'NCS : All Releases 💿',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'availability': 'public',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': [],
|
||||
'modified_date': '20231112',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)
|
||||
@ -1,71 +0,0 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# 2 formats
|
||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||
'info_dict': {
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return base64.b64decode(urllib.parse.unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for height, encoded_url in re.findall(
|
||||
r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
|
||||
video_url = decode_url(encoded_url)
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': f'{height}p',
|
||||
'height': int(height),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
f['ext'] = 'flv'
|
||||
formats.append(f)
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'file=([^&]+)', webpage, 'video url', default=None)
|
||||
if file_url:
|
||||
video_url = decode_url(file_url)
|
||||
if all(f['url'] != video_url for f in formats):
|
||||
formats.append({
|
||||
'url': decode_url(file_url),
|
||||
})
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||
webpage, 'player params', group='query')
|
||||
|
||||
player_params = urllib.parse.parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format(
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
return [{
|
||||
'format_id': format_id,
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'quality': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
|
||||
'info_dict': {
|
||||
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query'))
|
||||
if not qs.get('vid') or not qs.get('uid'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'BokeCC Video', # no title provided in the webpage
|
||||
'formats': self._extract_bokecc_formats(webpage, video_id),
|
||||
}
|
||||
@ -1,74 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaffeineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'info_dict': {
|
||||
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOOOOD MORNINNNNN #highlights',
|
||||
'timestamp': 1654702180,
|
||||
'upload_date': '20220608',
|
||||
'uploader': 'RahJON Wicc',
|
||||
'uploader_id': 'TsuSurf',
|
||||
'duration': 3145,
|
||||
'age_limit': 17,
|
||||
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['highlights', 'battlerap'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
|
||||
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
|
||||
|
||||
video_url = broadcast_info['video_url']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(json_data, {
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, filter),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
}),
|
||||
**traverse_obj(broadcast_info, {
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
'FOUR_PLUS': 0,
|
||||
'NINE_PLUS': 9,
|
||||
'TWELVE_PLUS': 12,
|
||||
'SEVENTEEN_PLUS': 17,
|
||||
}.get(broadcast_info.get('content_rating'), 17),
|
||||
}
|
||||
@ -1,155 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
|
||||
'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
|
||||
'ext': 'ts',
|
||||
'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'thumbnail': 're:https://.+\\.png',
|
||||
'description': 'First episode',
|
||||
'uploader': 'Wesley Yang',
|
||||
'timestamp': 1639404128.65,
|
||||
'upload_date': '20211213',
|
||||
'uploader_id': 'wesyang',
|
||||
'uploader_url': 'http://wesleyyang.substack.com',
|
||||
'channel': 'Conversations in Year Zero',
|
||||
'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
|
||||
'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
|
||||
'duration': 9951.936,
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics', 'History', 'Technology'],
|
||||
'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
|
||||
'series': 'Conversations in Year Zero',
|
||||
'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
|
||||
'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
|
||||
'episode_number': 1,
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||
'info_dict': {
|
||||
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'ext': 'ts',
|
||||
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'description': 'Or, why the government doesn’t like SpaceX',
|
||||
'channel': 'The Pull Request',
|
||||
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||
'duration': 3182.472,
|
||||
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'uploader_url': 'http://thepullrequest.com',
|
||||
'upload_date': '20220902',
|
||||
'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'series': 'The Pull Request',
|
||||
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||
'view_count': int,
|
||||
'uploader': 'Antonio García Martínez',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
|
||||
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'timestamp': 1662100688.005,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||
'info_dict': {
|
||||
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'ext': 'ts',
|
||||
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||
'duration': 10043.16,
|
||||
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'uploader_url': 'http://patreon.com/badfaithpodcast',
|
||||
'upload_date': '20220826',
|
||||
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'display_id': 'episode-',
|
||||
'series': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||
'view_count': int,
|
||||
'uploader': 'Briahna Gray',
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
|
||||
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'timestamp': 1661476708.282,
|
||||
},
|
||||
}]
|
||||
|
||||
def try_get_user_name(self, d):
|
||||
names = [d.get(n) for n in ('first', 'last')]
|
||||
if None in names:
|
||||
return next((n for n in names if n), default=None)
|
||||
return ' '.join(names)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
next_data = self._search_nextjs_data(webpage, display_id)
|
||||
episode = next_data['props']['pageProps']['episode']
|
||||
|
||||
video_id = episode['id']
|
||||
title = episode.get('title') or self._generic_title('', webpage)
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
|
||||
show = traverse_obj(episode, ('show', 'title'))
|
||||
show_id = traverse_obj(episode, ('show', 'id'))
|
||||
|
||||
show_json = None
|
||||
app_slug = (self._html_search_regex(
|
||||
'<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
|
||||
webpage, 'app slug', fatal=False) or next_data.get('buildId'))
|
||||
show_slug = traverse_obj(episode, ('show', 'linkObj', 'resourceUrl'))
|
||||
if app_slug and show_slug and '/' in show_slug:
|
||||
show_slug = show_slug.rsplit('/', 1)[1]
|
||||
show_json_url = f'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
|
||||
show_json = self._download_json(show_json_url, display_id, fatal=False)
|
||||
|
||||
host = (traverse_obj(show_json, ('pageProps', 'show', 'hosts', 0))
|
||||
or traverse_obj(episode, ('speakers', 0)))
|
||||
|
||||
host_nick = traverse_obj(host, ('linkObj', 'resourceUrl'))
|
||||
host_nick = host_nick.rsplit('/', 1)[1] if (host_nick and '/' in host_nick) else None
|
||||
|
||||
cast = list(filter(None, [
|
||||
self.try_get_user_name(u) for u in
|
||||
traverse_obj(episode, (('speakers', 'callerTags'), ...)) or []
|
||||
]))
|
||||
|
||||
episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
|
||||
episode_number = next(
|
||||
(len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id),
|
||||
None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': traverse_obj(episode, ('show', 'photo')),
|
||||
'description': episode.get('description'),
|
||||
'uploader': self.try_get_user_name(host) if host else None,
|
||||
'timestamp': episode.get('publishedAt'),
|
||||
'uploader_id': host_nick,
|
||||
'uploader_url': traverse_obj(show_json, ('pageProps', 'show', 'url')),
|
||||
'channel': show,
|
||||
'channel_id': show_id,
|
||||
'channel_url': traverse_obj(episode, ('show', 'linkObj', 'resourceUrl')),
|
||||
'duration': float_or_none(episode.get('runtime')),
|
||||
'view_count': int_or_none(episode.get('plays')),
|
||||
'categories': traverse_obj(episode, ('show', 'categorizations', ..., 'name')),
|
||||
'cast': cast if cast else None,
|
||||
'series': show,
|
||||
'series_id': show_id,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
'episode_id': video_id,
|
||||
}
|
||||
@ -1,155 +0,0 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CamdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# single file
|
||||
'url': 'http://www.camdemy.com/media/5181/',
|
||||
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||
'info_dict': {
|
||||
'id': '5181',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
'id': '13885',
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'duration': 318,
|
||||
},
|
||||
}, {
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = urllib.parse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
urllib.parse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = urllib.parse.urljoin(video_folder, file_name)
|
||||
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
'info_dict': {
|
||||
'id': '450',
|
||||
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||
},
|
||||
'playlist_mincount': 145,
|
||||
}, {
|
||||
# links without trailing slash
|
||||
# and multi-page
|
||||
'url': 'http://www.camdemy.com/folder/853',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
# with displayMode parameter. For testing the codes to add parameters
|
||||
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id = self._match_id(url)
|
||||
|
||||
# Add displayMode=list so that all links are displayed in a single page
|
||||
parsed_url = list(urllib.parse.urlparse(url))
|
||||
query = dict(urllib.parse.parse_qsl(parsed_url[4]))
|
||||
query.update({'displayMode': 'list'})
|
||||
parsed_url[4] = urllib.parse.urlencode(query)
|
||||
final_url = urllib.parse.urlunparse(parsed_url)
|
||||
|
||||
page = self._download_webpage(final_url, folder_id)
|
||||
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||
|
||||
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||
for media_path in matches]
|
||||
|
||||
folder_title = self._html_search_meta('keywords', page)
|
||||
|
||||
return self.playlist_result(entries, folder_id, folder_title)
|
||||
@ -1,70 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class ClippitIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.clippituser.tv/c/evmgm',
|
||||
'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
|
||||
'info_dict': {
|
||||
'id': 'evmgm',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bye bye Brutus. #BattleBots - Clippit',
|
||||
'uploader': 'lizllove',
|
||||
'uploader_url': 'https://www.clippituser.tv/p/lizllove',
|
||||
'timestamp': 1472183818,
|
||||
'upload_date': '20160826',
|
||||
'description': 'BattleBots | ABC',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
|
||||
|
||||
FORMATS = ('sd', 'hd')
|
||||
quality = qualities(FORMATS)
|
||||
formats = []
|
||||
for format_id in FORMATS:
|
||||
url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"',
|
||||
webpage, 'url', fatal=False)
|
||||
if not url:
|
||||
continue
|
||||
match = re.search(r'/(?P<height>\d+)\.mp4', url)
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'height': int(match.group('height')) if match else None,
|
||||
})
|
||||
|
||||
uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_url = ('https://www.clippituser.tv/p/' + uploader
|
||||
if uploader else None)
|
||||
|
||||
timestamp = self._html_search_regex(r'datetime="(.+?)"',
|
||||
webpage, 'date', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'data-image="(.+?)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'timestamp': parse_iso8601(timestamp),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@ -1,113 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CONtvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
|
||||
'info_dict': {
|
||||
'id': 'CEG10022949',
|
||||
'ext': 'mp4',
|
||||
'title': 'Days Of Thrills & Laughter',
|
||||
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
|
||||
'upload_date': '20180703',
|
||||
'timestamp': 1530634789.61,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
|
||||
'info_dict': {
|
||||
'id': 'CLIP-show_fotld_bts',
|
||||
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
details = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
|
||||
video_id, query={'device': 'web'})
|
||||
|
||||
if details.get('type') == 'episodic':
|
||||
seasons = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
|
||||
video_id)
|
||||
entries = []
|
||||
for season in seasons:
|
||||
for episode in season.get('episodes', []):
|
||||
episode_id = episode.get('id')
|
||||
if not episode_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.contv.com/details-movie/' + episode_id,
|
||||
CONtvIE.ie_key(), episode_id))
|
||||
return self.playlist_result(entries, video_id, details.get('title'))
|
||||
|
||||
m_details = details['details']
|
||||
title = details['title']
|
||||
|
||||
formats = []
|
||||
|
||||
media_hls_url = m_details.get('media_hls_url')
|
||||
if media_hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_hls_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
media_mp4_url = m_details.get('media_mp4_url')
|
||||
if media_mp4_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': media_mp4_url,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
captions = m_details.get('captions') or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in m_details.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
description = None
|
||||
for p in ('large_', 'medium_', 'small_', ''):
|
||||
d = m_details.get(p + 'description')
|
||||
if d:
|
||||
description = d
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': description,
|
||||
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(m_details.get('duration'), 1000),
|
||||
'view_count': int_or_none(details.get('num_watched')),
|
||||
'like_count': int_or_none(details.get('num_fav')),
|
||||
'categories': details.get('category'),
|
||||
'tags': details.get('tags'),
|
||||
'season_number': int_or_none(details.get('season')),
|
||||
'episode_number': int_or_none(details.get('episode')),
|
||||
'release_year': int_or_none(details.get('pub_year')),
|
||||
}
|
||||
@ -1,113 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class DroobleIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://drooble\.com/(?:
|
||||
(?:(?P<user>[^/]+)/)?(?P<kind>song|videos|music/albums)/(?P<id>\d+)|
|
||||
(?P<user_2>[^/]+)/(?P<kind_2>videos|music))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://drooble.com/song/2858030',
|
||||
'md5': '5ffda90f61c7c318dc0c3df4179eb064',
|
||||
'info_dict': {
|
||||
'id': '2858030',
|
||||
'ext': 'mp3',
|
||||
'title': 'Skankocillin',
|
||||
'upload_date': '20200801',
|
||||
'timestamp': 1596241390,
|
||||
'uploader_id': '95894',
|
||||
'uploader': 'Bluebeat Shelter',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/videos/2859183',
|
||||
'info_dict': {
|
||||
'id': 'J6QCQY_I5Tk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skankocillin',
|
||||
'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg',
|
||||
'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca',
|
||||
'upload_date': '20200731',
|
||||
'uploader': 'Bluebeat Shelter',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/music/albums/2858031',
|
||||
'info_dict': {
|
||||
'id': '2858031',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/music',
|
||||
'info_dict': {
|
||||
'id': 'karl340758',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/videos',
|
||||
'info_dict': {
|
||||
'id': 'karl340758',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
def _call_api(self, method, video_id, data=None):
|
||||
response = self._download_json(
|
||||
f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode())
|
||||
if not response[0]:
|
||||
raise ExtractorError('Unable to download JSON metadata')
|
||||
return response[1]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
user = mobj.group('user') or mobj.group('user_2')
|
||||
kind = mobj.group('kind') or mobj.group('kind_2')
|
||||
display_id = mobj.group('id') or user
|
||||
|
||||
if mobj.group('kind_2') == 'videos':
|
||||
data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'}
|
||||
elif kind in ('music/albums', 'music'):
|
||||
data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}}
|
||||
else:
|
||||
data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'}
|
||||
|
||||
method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements'
|
||||
json_data = self._call_api(method, display_id, data=data)
|
||||
if kind in ('music/albums', 'music'):
|
||||
json_data = json_data['singles']['list']
|
||||
|
||||
entites = []
|
||||
for media in json_data:
|
||||
url = media.get('external_media_url') or media.get('link')
|
||||
if url.startswith('https://www.youtube.com'):
|
||||
entites.append({
|
||||
'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
continue
|
||||
is_audio = (media.get('type') or '').lower() == 'audio'
|
||||
entites.append({
|
||||
'url': url,
|
||||
'id': media['id'],
|
||||
'title': media['title'],
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'timestamp': int_or_none(media.get('timestamp')),
|
||||
'album': try_get(media, lambda x: x['album']['title']),
|
||||
'uploader': try_get(media, lambda x: x['creator']['display_name']),
|
||||
'uploader_id': try_get(media, lambda x: x['creator']['id']),
|
||||
'thumbnail': media.get('image_comment'),
|
||||
'like_count': int_or_none(media.get('likes')),
|
||||
'vcodec': 'none' if is_audio else None,
|
||||
'ext': 'mp3' if is_audio else None,
|
||||
})
|
||||
|
||||
if len(entites) > 1:
|
||||
return self.playlist_result(entites, display_id)
|
||||
|
||||
return entites[0]
|
||||
@ -1,246 +0,0 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
|
||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||
|
||||
if tag is None:
|
||||
tag = '[a-zA-Z0-9:._-]+'
|
||||
if attribute is None:
|
||||
attribute = ''
|
||||
else:
|
||||
attribute = rf'\s+(?P<attribute>{re.escape(attribute)})'
|
||||
if value is None:
|
||||
value = ''
|
||||
else:
|
||||
value = re.escape(value) if escape_value else value
|
||||
value = f'=[\'"]?(?P<value>{value})[\'"]?'
|
||||
|
||||
retlist = []
|
||||
for m in re.finditer(rf'''(?xs)
|
||||
<(?P<tag>{tag})
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
|
||||
{attribute}{value}
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
|
||||
\s*>
|
||||
(?P<content>.*?)
|
||||
</\1>
|
||||
''', html):
|
||||
retlist.append(m)
|
||||
|
||||
return retlist
|
||||
|
||||
|
||||
def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
|
||||
retval = _get_elements_by_tag_and_attrib(html, tag, attribute, value, escape_value)
|
||||
return retval[0] if retval else None
|
||||
|
||||
|
||||
class DubokuIE(InfoExtractor):
|
||||
IE_NAME = 'duboku'
|
||||
IE_DESC = 'www.duboku.io'
|
||||
|
||||
_VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
|
||||
'info_dict': {
|
||||
'id': '1575-1-1',
|
||||
'ext': 'mp4',
|
||||
'series': '白色月光',
|
||||
'title': 'contains:白色月光',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_id': '1',
|
||||
'season_id': '1',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
|
||||
'info_dict': {
|
||||
'id': '1588-1-1',
|
||||
'ext': 'mp4',
|
||||
'series': '亲爱的自己',
|
||||
'title': 'contains:第1集',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'episode_id': '1',
|
||||
'season_id': '1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}]
|
||||
|
||||
_PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*</script'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
temp = video_id.split('-')
|
||||
series_id = temp[0]
|
||||
season_id = temp[1]
|
||||
episode_id = temp[2]
|
||||
|
||||
webpage_url = f'https://w.duboku.io/vodplay/{video_id}.html'
|
||||
webpage_html = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
# extract video url
|
||||
|
||||
player_data = self._search_regex(
|
||||
self._PLAYER_DATA_PATTERN, webpage_html, 'player_data')
|
||||
player_data = self._parse_json(player_data, video_id, js_to_json)
|
||||
|
||||
# extract title
|
||||
|
||||
temp = get_elements_by_class('title', webpage_html)
|
||||
series_title = None
|
||||
title = None
|
||||
for html in temp:
|
||||
mobj = re.search(r'<a\s+.*>(.*)</a>', html)
|
||||
if mobj:
|
||||
href = extract_attributes(mobj.group(0)).get('href')
|
||||
if href:
|
||||
mobj1 = re.search(r'/(\d+)\.html', href)
|
||||
if mobj1 and mobj1.group(1) == series_id:
|
||||
series_title = clean_html(mobj.group(0))
|
||||
series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title)
|
||||
title = clean_html(html)
|
||||
title = re.sub(r'[\s\r\n\t]+', ' ', title)
|
||||
break
|
||||
|
||||
data_url = player_data.get('url')
|
||||
if not data_url:
|
||||
raise ExtractorError('Cannot find url in player_data')
|
||||
player_encrypt = player_data.get('encrypt')
|
||||
if player_encrypt == 1:
|
||||
data_url = urllib.parse.unquote(data_url)
|
||||
elif player_encrypt == 2:
|
||||
data_url = urllib.parse.unquote(base64.b64decode(data_url).decode('ascii'))
|
||||
|
||||
# if it is an embedded iframe, maybe it's an external source
|
||||
headers = {'Referer': webpage_url}
|
||||
if player_data.get('from') == 'iframe':
|
||||
# use _type url_transparent to retain the meaningful details
|
||||
# of the video.
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(data_url, {'referer': webpage_url}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': series_title,
|
||||
'season_number': int_or_none(season_id),
|
||||
'season_id': season_id,
|
||||
'episode_number': int_or_none(episode_id),
|
||||
'episode_id': episode_id,
|
||||
}
|
||||
|
||||
formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': series_title,
|
||||
'season_number': int_or_none(season_id),
|
||||
'season_id': season_id,
|
||||
'episode_number': int_or_none(episode_id),
|
||||
'episode_id': episode_id,
|
||||
'formats': formats,
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class DubokuPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'duboku:list'
|
||||
IE_DESC = 'www.duboku.io entire series'
|
||||
|
||||
_VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://w.duboku.io/voddetail/1575.html',
|
||||
'info_dict': {
|
||||
'id': 'startswith:1575',
|
||||
'title': '白色月光',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}, {
|
||||
'url': 'https://w.duboku.io/voddetail/1554.html',
|
||||
'info_dict': {
|
||||
'id': 'startswith:1554',
|
||||
'title': '以家人之名',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(f'Invalid URL: {url}')
|
||||
series_id = mobj.group('id')
|
||||
fragment = urllib.parse.urlparse(url).fragment
|
||||
|
||||
webpage_url = f'https://w.duboku.io/voddetail/{series_id}.html'
|
||||
webpage_html = self._download_webpage(webpage_url, series_id)
|
||||
|
||||
# extract title
|
||||
|
||||
title = _get_element_by_tag_and_attrib(webpage_html, 'h1', 'class', 'title')
|
||||
title = unescapeHTML(title.group('content')) if title else None
|
||||
if not title:
|
||||
title = self._html_search_meta('keywords', webpage_html)
|
||||
if not title:
|
||||
title = _get_element_by_tag_and_attrib(webpage_html, 'title')
|
||||
title = unescapeHTML(title.group('content')) if title else None
|
||||
|
||||
# extract playlists
|
||||
|
||||
playlists = {}
|
||||
for div in _get_elements_by_tag_and_attrib(
|
||||
webpage_html, attribute='id', value='playlist\\d+', escape_value=False):
|
||||
playlist_id = div.group('value')
|
||||
playlist = []
|
||||
for a in _get_elements_by_tag_and_attrib(
|
||||
div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False):
|
||||
playlist.append({
|
||||
'href': unescapeHTML(a.group('value')),
|
||||
'title': unescapeHTML(a.group('content')),
|
||||
})
|
||||
playlists[playlist_id] = playlist
|
||||
|
||||
# select the specified playlist if url fragment exists
|
||||
playlist = None
|
||||
playlist_id = None
|
||||
if fragment:
|
||||
playlist = playlists.get(fragment)
|
||||
playlist_id = fragment
|
||||
else:
|
||||
first = next(iter(playlists.items()), None)
|
||||
if first:
|
||||
(playlist_id, playlist) = first
|
||||
if not playlist:
|
||||
raise ExtractorError(
|
||||
f'Cannot find {fragment}' if fragment else 'Cannot extract playlist')
|
||||
|
||||
# return url results
|
||||
return self.playlist_result([
|
||||
self.url_result(
|
||||
urllib.parse.urljoin('https://w.duboku.io', x['href']),
|
||||
ie=DubokuIE.ie_key(), video_title=x.get('title'))
|
||||
for x in playlist], series_id + '#' + playlist_id, title)
|
||||
@ -1,158 +0,0 @@
|
||||
import json
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
'name': 'EightTracks',
|
||||
'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
|
||||
'info_dict': {
|
||||
'id': '1336550',
|
||||
'display_id': 'youtube-dl-test-tracks-a',
|
||||
'description': "test chars: \"'/\\ä↭",
|
||||
'title': "youtube-dl test tracks \"'/\\ä↭<>",
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '96ce57f24389fc8734ce47f4c1abcc55',
|
||||
'info_dict': {
|
||||
'id': '11885610',
|
||||
'ext': 'm4a',
|
||||
'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '4ab26f05c1f7291ea460a3920be8021f',
|
||||
'info_dict': {
|
||||
'id': '11885608',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'd30b5b5f74217410f4689605c35d1fd7',
|
||||
'info_dict': {
|
||||
'id': '11885679',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '4eb0a669317cd725f6bbd336a29f923a',
|
||||
'info_dict': {
|
||||
'id': '11885680',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '1893e872e263a2705558d1d319ad19e8',
|
||||
'info_dict': {
|
||||
'id': '11885682',
|
||||
'ext': 'm4a',
|
||||
'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'b673c46f47a216ab1741ae8836af5899',
|
||||
'info_dict': {
|
||||
'id': '11885683',
|
||||
'ext': 'm4a',
|
||||
'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '1d74534e95df54986da7f5abf7d842b7',
|
||||
'info_dict': {
|
||||
'id': '11885684',
|
||||
'ext': 'm4a',
|
||||
'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
|
||||
'info_dict': {
|
||||
'id': '11885685',
|
||||
'ext': 'm4a',
|
||||
'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl',
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PAGE\.mix\s*=\s*({.+?});\n', webpage, 'trax information'),
|
||||
playlist_id)
|
||||
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
duration = data['duration']
|
||||
avg_song_duration = float(duration) / track_count
|
||||
# duration is sometimes negative, use predefined avg duration
|
||||
if avg_song_duration <= 0:
|
||||
avg_song_duration = 300
|
||||
first_url = f'http://8tracks.com/sets/{session}/play?player=sm&mix_id={mix_id}&format=jsonh'
|
||||
next_url = first_url
|
||||
entries = []
|
||||
|
||||
for i in range(track_count):
|
||||
api_json = None
|
||||
download_tries = 0
|
||||
|
||||
while api_json is None:
|
||||
try:
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
except ExtractorError:
|
||||
if download_tries > 3:
|
||||
raise
|
||||
else:
|
||||
download_tries += 1
|
||||
self._sleep(avg_song_duration, playlist_id)
|
||||
|
||||
api_data = json.loads(api_json)
|
||||
track_data = api_data['set']['track']
|
||||
info = {
|
||||
'id': str(track_data['id']),
|
||||
'url': track_data['track_file_stream_url'],
|
||||
'title': track_data['performer'] + ' - ' + track_data['name'],
|
||||
'raw_title': track_data['name'],
|
||||
'uploader_id': data['user']['login'],
|
||||
'ext': 'm4a',
|
||||
}
|
||||
entries.append(info)
|
||||
|
||||
next_url = 'http://8tracks.com/sets/{}/next?player=sm&mix_id={}&format=jsonh&track_id={}'.format(
|
||||
session, mix_id, track_data['id'])
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': str(mix_id),
|
||||
'display_id': playlist_id,
|
||||
'title': data.get('name'),
|
||||
'description': data.get('description'),
|
||||
}
|
||||
@ -1,81 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = 'eitb.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
|
||||
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||
'info_dict': {
|
||||
'id': '4090227752001',
|
||||
'ext': 'mp4',
|
||||
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||
'description': 'Programa de reportajes de actualidad.',
|
||||
'duration': 3996.76,
|
||||
'timestamp': 1381789200,
|
||||
'upload_date': '20131014',
|
||||
'tags': list,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
f'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/{video_id}/',
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
media = video['web_media'][0]
|
||||
|
||||
formats = []
|
||||
for rendition in media['RENDITIONS']:
|
||||
video_url = rendition.get('PMD_URL')
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': join_nonempty('http', int_or_none(tbr)),
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
request, video_id, 'Downloading auth token', fatal=False)
|
||||
if token_data:
|
||||
token = token_data.get('token')
|
||||
if token:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f'{hls_url}?hdnts={token}', video_id, m3u8_id='hls', fatal=False))
|
||||
|
||||
hds_url = media.get('HDS_SURL')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
'{}?hdcore=3.7.0'.format(hds_url.replace('euskalsvod', 'euskalvod')),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
|
||||
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
|
||||
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
|
||||
'duration': float_or_none(media.get('LENGTH'), 1000),
|
||||
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
|
||||
'tags': media.get('TAGS'),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,61 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class EyedoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||
'info_dict': {
|
||||
'id': '16301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||
'uploader': 'Afnic Live',
|
||||
'uploader_id': '8023',
|
||||
},
|
||||
}
|
||||
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml(f'http://eyedo.tv/api/live/GetLive/{video_id}', video_id)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||
|
||||
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||
if state_live_code == 'avenir':
|
||||
raise ExtractorError(
|
||||
f'{self.IE_NAME} said: We\'re sorry, but this video is not yet available.',
|
||||
expected=True)
|
||||
|
||||
is_live = state_live_code == 'live'
|
||||
m3u8_url = None
|
||||
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||
if is_live:
|
||||
if xpath_text(video_data, 'Cdn') == 'true':
|
||||
m3u8_url = f'http://rrr.sz.xlcdn.com/?account=eyedo&file=A{video_id}&type=live&service=wowza&protocol=http&output=playlist.m3u8'
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + f'w/{video_id}/eyedo_720p/playlist.m3u8'
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + f'replay-w/{video_id}/mp4:{video_id}.mp4/playlist.m3u8'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||
}
|
||||
@ -12,14 +12,6 @@ class FootyRoomIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'add_ie': [StreamableIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
|
||||
'info_dict': {
|
||||
'id': '75817984',
|
||||
'title': 'VIDEO Georgia 0 - 2 Germany',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'add_ie': ['Playwire'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -38,13 +30,6 @@ class FootyRoomIE(InfoExtractor):
|
||||
payload = video.get('payload')
|
||||
if not payload:
|
||||
continue
|
||||
playwire_url = self._html_search_regex(
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(self._proto_relative_url(
|
||||
playwire_url, 'http:'), 'Playwire'))
|
||||
|
||||
streamable_url = StreamableIE._extract_url(payload)
|
||||
if streamable_url:
|
||||
entries.append(self.url_result(
|
||||
|
||||
@ -1,56 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import month_by_name
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
||||
'info_dict': {
|
||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'ext': 'mp3',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video url', group='url')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
upload_date_list.reverse()
|
||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
||||
upload_date = ''.join(upload_date_list)
|
||||
else:
|
||||
upload_date = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'vcodec': 'none',
|
||||
}],
|
||||
}
|
||||
@ -1,73 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'https://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
1200: (1280, 720),
|
||||
2000: (1280, 720),
|
||||
4000: (1920, 1080),
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
|
||||
'info_dict': {
|
||||
'id': '5d40110076',
|
||||
'ext': 'ts',
|
||||
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
|
||||
'info_dict': {
|
||||
'id': '5d40810083',
|
||||
'ext': 'ts',
|
||||
'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻',
|
||||
'description': 'md5:3972d900b896adc8ab1849e310507efa',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'},
|
||||
'skip': 'Video available only in one week',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, video_id = self._match_valid_url(url).groups()
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
json_info = {}
|
||||
token = self._get_cookies(url).get('CT')
|
||||
if token:
|
||||
json_info = self._download_json(
|
||||
f'https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id={video_id}&is_premium=false',
|
||||
video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
|
||||
else:
|
||||
self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}')
|
||||
formats, subtitles = [], {}
|
||||
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
|
||||
for src in src_json['video_selector']:
|
||||
if not src.get('url'):
|
||||
continue
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
|
||||
for f in fmt:
|
||||
f.update(dict(zip(('height', 'width'),
|
||||
self._BITRATE_MAP.get(f.get('tbr'), ()), strict=False)))
|
||||
formats.extend(fmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_info.get('ep_title'),
|
||||
'series': json_info.get('lu_title'),
|
||||
'series_id': series_id,
|
||||
'description': json_info.get('ep_description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
|
||||
'_format_sort_fields': ('tbr', ),
|
||||
}
|
||||
@ -1,70 +1,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class GabTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
|
||||
'info_dict': {
|
||||
'id': '61217eacea5665de450d0488',
|
||||
'ext': 'mp4',
|
||||
'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
|
||||
'uploader': 'Wurzelroot',
|
||||
'uploader_id': '608fb0a85738fd1974984f7d',
|
||||
'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).split('-')[-1]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
channel_id = self._search_regex(r'data-channel-id=\"(?P<channel_id>[^\"]+)', webpage, 'channel_id')
|
||||
channel_name = self._search_regex(r'data-channel-name=\"(?P<channel_id>[^\"]+)', webpage, 'channel_name')
|
||||
title = self._search_regex(r'data-episode-title=\"(?P<channel_id>[^\"]+)', webpage, 'title')
|
||||
view_key = self._search_regex(r'data-view-key=\"(?P<channel_id>[^\"]+)', webpage, 'view_key')
|
||||
description = clean_html(
|
||||
self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None
|
||||
available_resolutions = re.findall(
|
||||
rf'<a\ data-episode-id=\"{video_id}\"\ data-resolution=\"(?P<resolution>[^\"]+)', webpage)
|
||||
|
||||
formats = []
|
||||
for resolution in available_resolutions:
|
||||
frmt = {
|
||||
'url': f'https://tv.gab.com/media/{video_id}?viewKey={view_key}&r={resolution}',
|
||||
'format_id': resolution,
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
if 'audio-' in resolution:
|
||||
frmt['abr'] = str_to_int(resolution.replace('audio-', ''))
|
||||
frmt['height'] = 144
|
||||
frmt['quality'] = -10
|
||||
else:
|
||||
frmt['height'] = str_to_int(resolution.replace('p', ''))
|
||||
formats.append(frmt)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': channel_name,
|
||||
'uploader_id': channel_id,
|
||||
'thumbnail': f'https://tv.gab.com/image/{video_id}',
|
||||
}
|
||||
|
||||
|
||||
class GabIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gab\.com/[^/]+/posts/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class GooglePodcastsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
|
||||
|
||||
def _batch_execute(self, func_id, video_id, params):
|
||||
return json.loads(self._download_json(
|
||||
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
|
||||
video_id, data=urlencode_postdata({
|
||||
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
|
||||
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
return {
|
||||
'id': episode[4][3],
|
||||
'title': episode[8],
|
||||
'url': clean_podcast_url(episode[13]),
|
||||
'thumbnail': episode[2],
|
||||
'description': episode[9],
|
||||
'creator': try_get(episode, lambda x: x[14]),
|
||||
'timestamp': int_or_none(episode[11]),
|
||||
'duration': int_or_none(episode[12]),
|
||||
'series': episode[1],
|
||||
}
|
||||
|
||||
|
||||
class GooglePodcastsIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
|
||||
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
|
||||
'info_dict': {
|
||||
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
|
||||
'ext': 'mp3',
|
||||
'title': 'WWDTM New Year 2021',
|
||||
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
|
||||
'upload_date': '20210102',
|
||||
'timestamp': 1609606800,
|
||||
'duration': 2901,
|
||||
'series': "Wait Wait... Don't Tell Me!",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url, b64_guid = self._match_valid_url(url).groups()
|
||||
episode = self._batch_execute(
|
||||
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
|
||||
return self._extract_episode(episode)
|
||||
|
||||
|
||||
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts:feed'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
|
||||
'info_dict': {
|
||||
'title': "Wait Wait... Don't Tell Me!",
|
||||
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url = self._match_id(url)
|
||||
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
|
||||
|
||||
entries = []
|
||||
for episode in (try_get(data, lambda x: x[1][0]) or []):
|
||||
entries.append(self._extract_episode(episode))
|
||||
|
||||
feed = try_get(data, lambda x: x[3]) or []
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=try_get(feed, lambda x: x[0]),
|
||||
playlist_description=try_get(feed, lambda x: x[2]))
|
||||
@ -1,47 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
|
||||
'md5': '4b6db9a0a333142eb9f15913142b0ed1',
|
||||
'info_dict': {
|
||||
'id': '299069',
|
||||
'ext': 'flv',
|
||||
'title': 'DIESEL SFW XXX Video',
|
||||
'thumbnail': r're:^http://.*\.jpg$',
|
||||
'duration': 80,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2>(.*?)<', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
flashvars = urllib.parse.parse_qs(self._html_search_regex(
|
||||
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
||||
webpage, 'flashvars'))
|
||||
thumbnail = flashvars.get('url_bigthumb', [None])[0]
|
||||
video_url = flashvars['flv_url'][0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GPUTechConfIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
|
||||
'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
|
||||
'info_dict': {
|
||||
'id': '5156',
|
||||
'ext': 'mp4',
|
||||
'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
|
||||
'duration': 1219,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
root_path = self._search_regex(
|
||||
r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
|
||||
default='http://evt.dispeak.com/nvidia/events/gtc15/')
|
||||
xml_file_id = self._search_regex(
|
||||
r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': f'{root_path}xml/{xml_file_id}.xml',
|
||||
'ie_key': 'DigitallySpeaking',
|
||||
}
|
||||
@ -1,183 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class HKETVIE(InfoExtractor):
|
||||
IE_NAME = 'hketv'
|
||||
IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['HK']
|
||||
_VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hkedcity.net/etv/resource/2932360618',
|
||||
'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
|
||||
'info_dict': {
|
||||
'id': '2932360618',
|
||||
'ext': 'mp4',
|
||||
'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
|
||||
'description': 'md5:d5286d05219ef50e0613311cbe96e560',
|
||||
'upload_date': '20181024',
|
||||
'duration': 900,
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}, {
|
||||
'url': 'https://www.hkedcity.net/etv/resource/972641418',
|
||||
'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
|
||||
'info_dict': {
|
||||
'id': '972641418',
|
||||
'ext': 'mp4',
|
||||
'title': '衣冠楚楚 (天使系列之一)',
|
||||
'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
|
||||
'upload_date': '20070109',
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'中文(繁體中文)': 'zh-Hant',
|
||||
'中文(简体中文)': 'zh-Hans',
|
||||
'English': 'en',
|
||||
'Bahasa Indonesia': 'id',
|
||||
'\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
|
||||
'\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
|
||||
'Tagalog': 'tl',
|
||||
'\u0e44\u0e17\u0e22': 'th',
|
||||
'\u0627\u0631\u062f\u0648': 'ur',
|
||||
}
|
||||
_FORMAT_HEIGHTS = {
|
||||
'SD': 360,
|
||||
'HD': 720,
|
||||
}
|
||||
_APPS_BASE_URL = 'https://apps.hkedcity.net'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (
|
||||
self._html_search_meta(
|
||||
('ed_title', 'search.ed_title'), webpage, default=None)
|
||||
or self._search_regex(
|
||||
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'title', default=None, group='url')
|
||||
or self._html_search_regex(
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
|
||||
or self._og_search_title(webpage)
|
||||
)
|
||||
|
||||
file_id = self._search_regex(
|
||||
r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
|
||||
webpage, 'file ID')
|
||||
curr_url = self._search_regex(
|
||||
r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
|
||||
webpage, 'curr URL')
|
||||
data = {
|
||||
'action': 'get_info',
|
||||
'curr_url': curr_url,
|
||||
'file_id': file_id,
|
||||
'video_url': file_id,
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
self._APPS_BASE_URL + '/media/play/handler.php', video_id,
|
||||
data=urlencode_postdata(data),
|
||||
headers=merge_dicts({
|
||||
'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
self.geo_verification_headers()))
|
||||
|
||||
result = response['result']
|
||||
|
||||
if not response.get('success') or not response.get('access'):
|
||||
error = clean_html(response.get('access_err_msg'))
|
||||
if 'Video streaming is not available in your country' in error:
|
||||
self.raise_geo_restricted(
|
||||
msg=error, countries=self._GEO_COUNTRIES)
|
||||
else:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
formats = []
|
||||
|
||||
width = int_or_none(result.get('width'))
|
||||
height = int_or_none(result.get('height'))
|
||||
|
||||
playlist0 = result['playlist'][0]
|
||||
for fmt in playlist0['sources']:
|
||||
file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
# If we ever wanted to provide the final resolved URL that
|
||||
# does not require cookies, albeit with a shorter lifespan:
|
||||
# urlh = self._downloader.urlopen(file_url)
|
||||
# resolved_url = urlh.url
|
||||
label = fmt.get('label')
|
||||
h = self._FORMAT_HEIGHTS.get(label)
|
||||
w = h * width // height if h and width and height else None
|
||||
formats.append({
|
||||
'format_id': label,
|
||||
'ext': fmt.get('type'),
|
||||
'url': file_url,
|
||||
'width': w,
|
||||
'height': h,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = str_or_none(track.get('kind'))
|
||||
if not track_kind or not isinstance(track_kind, str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
track_label = track.get('label')
|
||||
subtitles.setdefault(self._CC_LANGS.get(
|
||||
track_label, track_label), []).append({
|
||||
'url': self._proto_relative_url(track_url),
|
||||
'ext': 'srt',
|
||||
})
|
||||
|
||||
# Likes
|
||||
emotion = self._download_json(
|
||||
'https://emocounter.hkedcity.net/handler.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'action': 'get_emotion',
|
||||
'data[bucket_id]': 'etv',
|
||||
'data[identifier]': video_id,
|
||||
}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
fatal=False) or {}
|
||||
like_count = int_or_none(try_get(
|
||||
emotion, lambda x: x['data']['emotion_data'][0]['count']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
'description', webpage, fatal=False),
|
||||
'upload_date': unified_strdate(self._html_search_meta(
|
||||
'ed_date', webpage, fatal=False), day_first=False),
|
||||
'duration': int_or_none(result.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
|
||||
'view_count': parse_count(result.get('view_count')),
|
||||
'like_count': like_count,
|
||||
}
|
||||
@ -1,115 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj, try_call, url_or_none
|
||||
|
||||
|
||||
class IdolPlusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00',
|
||||
'md5': '2ace3f4661c943a2f7e79f0b88cea1e7',
|
||||
'info_dict': {
|
||||
'id': 'M012077298PPV00',
|
||||
'ext': 'mp4',
|
||||
'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)',
|
||||
'release_date': '20200707',
|
||||
'formats': 'count:65',
|
||||
},
|
||||
'params': {'format': '532-KIM_MINJU'},
|
||||
}, {
|
||||
'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5',
|
||||
'info_dict': {
|
||||
'id': 'M01232H058PPV00',
|
||||
'ext': 'mp4',
|
||||
'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)',
|
||||
'release_date': '20230218',
|
||||
'formats': 'count:5',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00',
|
||||
'info_dict': {
|
||||
'id': 'M012323174PPV00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hanteo Music Awards 2022 DAY2',
|
||||
'release_date': '20230211',
|
||||
'formats': 'count:5',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://idolplus.com/zs/concert/M012323039PPV00',
|
||||
'info_dict': {
|
||||
'id': 'M012323039PPV00',
|
||||
'ext': 'mp4',
|
||||
'title': 'CIRCLE CHART MUSIC AWARDS 2022',
|
||||
'release_date': '20230218',
|
||||
'formats': 'count:5',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data_list = traverse_obj(self._download_json(
|
||||
'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id,
|
||||
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
|
||||
'rulesetId': 'contents',
|
||||
'albumId': video_id,
|
||||
'distribute': 'PRD',
|
||||
'loggedIn': 'false',
|
||||
'region': 'zs',
|
||||
'countryGroup': '00010',
|
||||
'lang': 'en',
|
||||
'saId': '999999999998',
|
||||
}), ('data', 'viewData', ...))
|
||||
|
||||
player_data = {}
|
||||
while data_list:
|
||||
player_data = data_list.pop()
|
||||
if traverse_obj(player_data, 'type') == 'player':
|
||||
break
|
||||
elif traverse_obj(player_data, ('dataList', ...)):
|
||||
data_list += player_data['dataList']
|
||||
|
||||
formats = self._extract_m3u8_formats(traverse_obj(player_data, (
|
||||
'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id)
|
||||
|
||||
subtitles = {}
|
||||
for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []:
|
||||
subtitles.setdefault(caption.get('lang') or 'und', []).append({
|
||||
'url': caption.get('smi_url'),
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
# Add member multicams as alternative formats
|
||||
if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y'
|
||||
and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'):
|
||||
cuesheet = traverse_obj(self._download_json(
|
||||
'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id,
|
||||
'Downloading JSON metadata for member multicams',
|
||||
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
|
||||
'ALBUM_ID': video_id,
|
||||
'COUNTRY_GRP': '00010',
|
||||
'LANG': 'en',
|
||||
'SA_ID': '999999999998',
|
||||
'COUNTRY_CODE': 'KR',
|
||||
}), ('data', 'cuesheet_item', 0))
|
||||
|
||||
for member in traverse_obj(cuesheet, ('members', ...)):
|
||||
index = try_call(lambda: int(member['omni_view_index']) - 1)
|
||||
member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none}))
|
||||
if not member_video_url:
|
||||
continue
|
||||
member_formats = self._extract_m3u8_formats(
|
||||
member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}')
|
||||
for mf in member_formats:
|
||||
mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}'
|
||||
formats.extend(member_formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(player_data, ('detail', 'albumName')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')),
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .bokecc import BokeCCBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@ -10,7 +10,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class InfoQIE(BokeCCBaseIE):
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@ -117,14 +117,10 @@ class InfoQIE(BokeCCBaseIE):
|
||||
video_title = self._html_extract_title(webpage)
|
||||
video_description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
if '/cn/' in url:
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage)
|
||||
+ self._extract_http_video(webpage)
|
||||
+ self._extract_http_audio(webpage, video_id))
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage)
|
||||
+ self._extract_http_video(webpage)
|
||||
+ self._extract_http_audio(webpage, video_id))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_qs
|
||||
|
||||
|
||||
class InternetVideoArchiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
|
||||
'info_dict': {
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kick-Ass 2',
|
||||
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_json_url(query):
|
||||
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = query['publishedid'][0]
|
||||
data = self._download_json(
|
||||
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
|
||||
video_id, data=json.dumps({
|
||||
'customerid': query['customerid'][0],
|
||||
'publishedid': video_id,
|
||||
}).encode())
|
||||
title = data['Title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['VideoUrl'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
file_url = formats[0]['url']
|
||||
if '.ism/' in file_url:
|
||||
replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': data.get('PosterUrl'),
|
||||
'description': data.get('Description'),
|
||||
}
|
||||
@ -1,111 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:(?:www|m)\.)?izlesene\.com/
|
||||
(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': int,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
|
||||
'md5': '97f09b6872bffa284cb7fa4f6910cb72',
|
||||
'info_dict': {
|
||||
'id': '17997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': r're:^https://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': int,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(f'http://www.izlesene.com/video/{video_id}', video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
|
||||
video_id)
|
||||
|
||||
title = video.get('videoTitle') or self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for stream in video['media']['level']:
|
||||
source_url = stream.get('source')
|
||||
if not source_url or not isinstance(source_url, str):
|
||||
continue
|
||||
ext = determine_ext(url, 'mp4')
|
||||
quality = stream.get('value')
|
||||
height = int_or_none(quality)
|
||||
formats.append({
|
||||
'format_id': f'{quality}p' if quality else 'sd',
|
||||
'url': urllib.parse.unquote(source_url),
|
||||
'ext': ext,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = video.get('posterURL') or self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';",
|
||||
webpage, 'uploader', fatal=False)
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date'))
|
||||
|
||||
duration = float_or_none(video.get('duration') or self._html_search_regex(
|
||||
r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'duration', fatal=False, group='value'), scale=1000)
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||
webpage, 'comment_count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,206 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class KinjaEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'kinja:embed'
|
||||
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
|
||||
(?:
|
||||
avclub|
|
||||
clickhole|
|
||||
deadspin|
|
||||
gizmodo|
|
||||
jalopnik|
|
||||
jezebel|
|
||||
kinja|
|
||||
kotaku|
|
||||
lifehacker|
|
||||
splinternews|
|
||||
the(?:inventory|onion|root|takeout)
|
||||
)\.com'''
|
||||
_COMMON_REGEX = r'''/
|
||||
(?:
|
||||
ajax/inset|
|
||||
embed/video
|
||||
)/iframe\?.*?\bid='''
|
||||
_VALID_URL = rf'''(?x)https?://{_DOMAIN_REGEX}{_COMMON_REGEX}
|
||||
(?P<type>
|
||||
fb|
|
||||
imgur|
|
||||
instagram|
|
||||
jwp(?:layer)?-video|
|
||||
kinjavideo|
|
||||
mcp|
|
||||
megaphone|
|
||||
soundcloud(?:-playlist)?|
|
||||
tumblr-post|
|
||||
twitch-stream|
|
||||
twitter|
|
||||
ustream-channel|
|
||||
vimeo|
|
||||
vine|
|
||||
youtube-(?:list|video)
|
||||
)-(?P<id>[^&]+)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||
'info_dict': {
|
||||
'id': '106351',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||
},
|
||||
'skip': 'Invalid URL',
|
||||
}]
|
||||
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
|
||||
_PROVIDER_MAP = {
|
||||
'fb': ('facebook.com/video.php?v=', 'Facebook'),
|
||||
'imgur': ('imgur.com/', 'Imgur'),
|
||||
'instagram': ('instagram.com/p/', 'Instagram'),
|
||||
'jwplayer-video': _JWPLATFORM_PROVIDER,
|
||||
'jwp-video': _JWPLATFORM_PROVIDER,
|
||||
'megaphone': ('player.megaphone.fm/', 'Generic'),
|
||||
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
|
||||
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
|
||||
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
|
||||
'twitch-stream': ('twitch.tv/', 'TwitchStream'),
|
||||
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
|
||||
'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
|
||||
'vimeo': ('vimeo.com/', 'Vimeo'),
|
||||
'vine': ('vine.co/v/', 'Vine'),
|
||||
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
|
||||
'youtube-video': ('youtube.com/embed/', 'Youtube'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
provider = self._PROVIDER_MAP.get(video_type)
|
||||
if provider:
|
||||
video_id = urllib.parse.unquote(video_id)
|
||||
if video_type == 'tumblr-post':
|
||||
video_id, blog = video_id.split('-', 1)
|
||||
result_url = provider[0] % (blog, video_id)
|
||||
elif video_type == 'youtube-list':
|
||||
video_id, playlist_id = video_id.split('/')
|
||||
result_url = provider[0] % (video_id, playlist_id)
|
||||
else:
|
||||
result_url = provider[0] + video_id
|
||||
return self.url_result('http://' + result_url, provider[1])
|
||||
|
||||
if video_type == 'kinjavideo':
|
||||
data = self._download_json(
|
||||
'https://kinja.com/api/core/video/views/videoById',
|
||||
video_id, query={'videoId': video_id})['data']
|
||||
title = data['title']
|
||||
|
||||
formats = []
|
||||
for k in ('signedPlaylist', 'streaming'):
|
||||
m3u8_url = data.get(k + 'Url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
thumbnail = None
|
||||
poster = data.get('poster') or {}
|
||||
poster_id = poster.get('id')
|
||||
if poster_id:
|
||||
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/{}.{}'.format(poster_id, poster.get('format') or 'jpg')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(data.get('description')),
|
||||
'formats': formats,
|
||||
'tags': data.get('tags'),
|
||||
'timestamp': int_or_none(try_get(
|
||||
data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': data.get('network'),
|
||||
}
|
||||
else:
|
||||
video_data = self._download_json(
|
||||
'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
|
||||
video_id)['videoMetadata']
|
||||
iptc = video_data['photoVideoMetadataIPTC']
|
||||
title = iptc['title']['en']
|
||||
fmg = video_data.get('photoVideoMetadata_fmg') or {}
|
||||
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
|
||||
data = self._download_json(
|
||||
tvss_domain + '/api/v3/video-auth/url-signature-tokens',
|
||||
video_id, query={'mcpids': video_id})['data'][0]
|
||||
formats = []
|
||||
|
||||
rendition_url = data.get('renditionUrl')
|
||||
if rendition_url:
|
||||
formats = self._extract_m3u8_formats(
|
||||
rendition_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
|
||||
fallback_rendition_url = data.get('fallbackRenditionUrl')
|
||||
if fallback_rendition_url:
|
||||
formats.append({
|
||||
'format_id': 'fallback',
|
||||
'tbr': int_or_none(self._search_regex(
|
||||
r'_(\d+)\.mp4', fallback_rendition_url,
|
||||
'bitrate', default=None)),
|
||||
'url': fallback_rendition_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], str),
|
||||
'uploader': fmg.get('network'),
|
||||
'duration': int_or_none(iptc.get('fileDuration')),
|
||||
'formats': formats,
|
||||
'description': try_get(iptc, lambda x: x['description']['en'], str),
|
||||
'timestamp': parse_iso8601(iptc.get('dateReleased')),
|
||||
}
|
||||
@ -1,115 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class KooIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
|
||||
_TESTS = [{ # Test for video in the comments
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
|
||||
'info_dict': {
|
||||
'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
|
||||
'ext': 'mp4',
|
||||
'title': 'test for video in comment',
|
||||
'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
|
||||
'timestamp': 1632215195,
|
||||
'uploader_id': 'ytdlpTestAccount',
|
||||
'uploader': 'yt-dlpTestAccount',
|
||||
'duration': 7000,
|
||||
'upload_date': '20210921',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, { # Test for koo with long title
|
||||
'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
|
||||
'info_dict': {
|
||||
'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
|
||||
'description': 'md5:06a6a84e9321499486dab541693d8425',
|
||||
'timestamp': 1632106884,
|
||||
'uploader_id': 'laxman_kumarDBFEC',
|
||||
'uploader': 'Laxman Kumar 🇮🇳',
|
||||
'duration': 46000,
|
||||
'upload_date': '20210920',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, { # Test for audio
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
|
||||
'info_dict': {
|
||||
'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
|
||||
'ext': 'mp4',
|
||||
'title': 'Test for audio',
|
||||
'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
|
||||
'timestamp': 1632211634,
|
||||
'uploader_id': 'ytdlpTestAccount',
|
||||
'uploader': 'yt-dlpTestAccount',
|
||||
'duration': 214000,
|
||||
'upload_date': '20210921',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, { # Test for video
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
|
||||
'info_dict': {
|
||||
'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Test for video',
|
||||
'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
|
||||
'timestamp': 1632211468,
|
||||
'uploader_id': 'ytdlpTestAccount',
|
||||
'uploader': 'yt-dlpTestAccount',
|
||||
'duration': 14000,
|
||||
'upload_date': '20210921',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, { # Test for link
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
|
||||
'skip': 'No video/audio found at the provided url.',
|
||||
'info_dict': {
|
||||
'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
|
||||
'title': 'Test for link',
|
||||
'ext': 'none',
|
||||
},
|
||||
}, { # Test for images
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
|
||||
'skip': 'No video/audio found at the provided url.',
|
||||
'info_dict': {
|
||||
'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
|
||||
'title': 'Test for images',
|
||||
'ext': 'none',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data_json = self._download_json(
|
||||
f'https://www.kooapp.com/apiV1/ku/{video_id}?limit=20&offset=0&showSimilarKoos=true', video_id)['parentContent']
|
||||
item_json = next(content['items'][0] for content in data_json
|
||||
if try_get(content, lambda x: x['items'][0]['id']) == video_id)
|
||||
media_json = item_json['mediaMap']
|
||||
formats = []
|
||||
|
||||
mp4_url = media_json.get('videoMp4')
|
||||
video_m3u8_url = media_json.get('videoHls')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
if video_m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(video_m3u8_url, video_id, fatal=False, ext='mp4'))
|
||||
if not formats:
|
||||
self.raise_no_formats('No video/audio found at the provided url.', expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clean_html(item_json.get('title')),
|
||||
'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
|
||||
'timestamp': item_json.get('createdAt'),
|
||||
'uploader_id': item_json.get('handle'),
|
||||
'uploader': item_json.get('name'),
|
||||
'duration': media_json.get('duration'),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,9 +1,6 @@
|
||||
import base64
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
@ -14,8 +11,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
)
|
||||
|
||||
@ -248,114 +243,3 @@ class LePlaylistIE(InfoExtractor):
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=title,
|
||||
playlist_description=description)
|
||||
|
||||
|
||||
class LetvCloudIE(InfoExtractor):
|
||||
# Most of *.letv.com is changed to *.le.com on 2016/01/02
|
||||
# but yuntv.letv.com is kept, so also keep the extractor name
|
||||
IE_DESC = '乐视云'
|
||||
_VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
|
||||
'md5': '26450599afd64c513bc77030ad15db44',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_467623dedf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_467623dedf',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
||||
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_ec93197892',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_ec93197892',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
||||
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_187060b6fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_187060b6fd',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def sign_data(obj):
|
||||
if obj['cf'] == 'flash':
|
||||
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
|
||||
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
|
||||
elif obj['cf'] == 'html5':
|
||||
salt = 'fbeh5player12c43eccf2bec3300344'
|
||||
items = ['cf', 'ran', 'uu', 'bver', 'vu']
|
||||
input_data = ''.join([item + obj[item] for item in items]) + salt
|
||||
obj['sign'] = hashlib.md5(input_data.encode()).hexdigest()
|
||||
|
||||
def _get_formats(self, cf, uu, vu, media_id):
|
||||
def get_play_json(cf, timestamp):
|
||||
data = {
|
||||
'cf': cf,
|
||||
'ver': '2.2',
|
||||
'bver': 'firefox44.0',
|
||||
'format': 'json',
|
||||
'uu': uu,
|
||||
'vu': vu,
|
||||
'ran': str(timestamp),
|
||||
}
|
||||
self.sign_data(data)
|
||||
return self._download_json(
|
||||
'http://api.letvcloud.com/gpc.php?' + urllib.parse.urlencode(data),
|
||||
media_id, f'Downloading playJson data for type {cf}')
|
||||
|
||||
play_json = get_play_json(cf, time.time())
|
||||
# The server time may be different from local time
|
||||
if play_json.get('code') == 10071:
|
||||
play_json = get_play_json(cf, play_json['timestamp'])
|
||||
|
||||
if not play_json.get('data'):
|
||||
if play_json.get('message'):
|
||||
raise ExtractorError('Letv cloud said: {}'.format(play_json['message']), expected=True)
|
||||
elif play_json.get('code'):
|
||||
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
|
||||
else:
|
||||
raise ExtractorError('Letv cloud returned an unknown error')
|
||||
|
||||
def b64decode(s):
|
||||
return base64.b64decode(s).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for media in play_json['data']['video_info']['media'].values():
|
||||
play_url = media['play_url']
|
||||
url = b64decode(play_url['main_url'])
|
||||
decoded_url = b64decode(url_basename(url))
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': determine_ext(decoded_url),
|
||||
'format_id': str_or_none(play_url.get('vtype')),
|
||||
'format_note': str_or_none(play_url.get('definition')),
|
||||
'width': int_or_none(play_url.get('vwidth')),
|
||||
'height': int_or_none(play_url.get('vheight')),
|
||||
})
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
uu_mobj = re.search(r'uu=([\w]+)', url)
|
||||
vu_mobj = re.search(r'vu=([\w]+)', url)
|
||||
|
||||
if not uu_mobj or not vu_mobj:
|
||||
raise ExtractorError(f'Invalid URL: {url}', expected=True)
|
||||
|
||||
uu = uu_mobj.group(1)
|
||||
vu = vu_mobj.group(1)
|
||||
media_id = uu + '_' + vu
|
||||
|
||||
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': f'Video {media_id}',
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@ -1,386 +0,0 @@
|
||||
import itertools
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
find_xpath_attr,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = 'livestream'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:new\.)?livestream\.com/
|
||||
(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))
|
||||
(?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))?
|
||||
(?:/videos/(?P<id>\d+))?
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
'md5': '7876c5f5dc3e711b6b73acce4aac1527',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live from Webster Hall NYC',
|
||||
'timestamp': 1350008072,
|
||||
'upload_date': '20121012',
|
||||
'duration': 5968.0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^http://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://livestream.com/coheedandcambria/websterhall',
|
||||
'info_dict': {
|
||||
'id': '1585861',
|
||||
'title': 'Live From Webster Hall',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://livestream.com/dayananda/events/7954027',
|
||||
'info_dict': {
|
||||
'title': 'Live from Mevo',
|
||||
'id': '7954027',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://livestream.com/accounts/82',
|
||||
'info_dict': {
|
||||
'id': '253978',
|
||||
'view_count': int,
|
||||
'title': 'trsr',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'upload_date': '20120306',
|
||||
'timestamp': 1331042383,
|
||||
'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
|
||||
'duration': 15.332,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base_ele = find_xpath_attr(
|
||||
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
|
||||
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
|
||||
|
||||
formats = []
|
||||
video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
|
||||
for vn in video_nodes:
|
||||
tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
|
||||
furl = (
|
||||
update_url_query(urllib.parse.urljoin(base, vn.attrib['src']), {
|
||||
'v': '3.0.3',
|
||||
'fp': 'WIN% 14,0,0,145',
|
||||
}))
|
||||
if 'clipBegin' in vn.attrib:
|
||||
furl += '&ssek=' + vn.attrib['clipBegin']
|
||||
formats.append({
|
||||
'url': furl,
|
||||
'format_id': 'smil_%d' % tbr,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'preference': -1000, # Strictly inferior than all other formats?
|
||||
})
|
||||
return formats, {}
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_id = str(video_data['id'])
|
||||
|
||||
FORMAT_KEYS = (
|
||||
('sd', 'progressive_url'),
|
||||
('hd', 'progressive_url_hd'),
|
||||
)
|
||||
|
||||
formats = []
|
||||
for format_id, key in FORMAT_KEYS:
|
||||
video_url = video_data.get(key)
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
rf'(\d+)\.{ext}', video_url, 'bitrate', default=None))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
smil_url = video_data.get('smil_url')
|
||||
if smil_url:
|
||||
formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
|
||||
|
||||
m3u8_url = video_data.get('m3u8_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
f4m_url = video_data.get('f4m_url')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
comments = [{
|
||||
'author_id': comment.get('author_id'),
|
||||
'author': comment.get('author', {}).get('full_name'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment['text'],
|
||||
'timestamp': parse_iso8601(comment.get('created_at')),
|
||||
} for comment in video_data.get('comments', {}).get('data', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data['caption'],
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnail_url'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(video_data.get('publish_at')),
|
||||
'like_count': video_data.get('likes', {}).get('total'),
|
||||
'comment_count': video_data.get('comments', {}).get('total'),
|
||||
'view_count': video_data.get('views'),
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
def _extract_stream_info(self, stream_info):
|
||||
broadcast_id = str(stream_info['broadcast_id'])
|
||||
is_live = stream_info.get('is_live')
|
||||
|
||||
formats = []
|
||||
smil_url = stream_info.get('play_url')
|
||||
if smil_url:
|
||||
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
|
||||
|
||||
m3u8_url = stream_info.get('m3u8_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
rtsp_url = stream_info.get('rtsp_url')
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'formats': formats,
|
||||
'title': stream_info['stream_title'],
|
||||
'thumbnail': stream_info.get('thumbnail_url'),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _generate_event_playlist(self, event_data):
|
||||
event_id = str(event_data['id'])
|
||||
account_id = str(event_data['owner_account_id'])
|
||||
feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
|
||||
|
||||
stream_info = event_data.get('stream_info')
|
||||
if stream_info:
|
||||
return self._extract_stream_info(stream_info)
|
||||
|
||||
last_video = None
|
||||
for i in itertools.count(1):
|
||||
if last_video is None:
|
||||
info_url = feed_root_url
|
||||
else:
|
||||
info_url = f'{feed_root_url}?&id={last_video}&newer=-1&type=video'
|
||||
videos_info = self._download_json(
|
||||
info_url, event_id, f'Downloading page {i}')['data']
|
||||
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
||||
if not videos_info:
|
||||
break
|
||||
for v in videos_info:
|
||||
v_id = str(v['id'])
|
||||
yield self.url_result(
|
||||
f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
|
||||
LivestreamIE, v_id, v.get('caption'))
|
||||
last_video = videos_info[-1]['id']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
event = mobj.group('event_id') or mobj.group('event_name')
|
||||
account = mobj.group('account_id') or mobj.group('account_name')
|
||||
api_url = f'http://livestream.com/api/accounts/{account}'
|
||||
|
||||
if video_id:
|
||||
video_data = self._download_json(
|
||||
f'{api_url}/events/{event}/videos/{video_id}', video_id)
|
||||
return self._extract_video_info(video_data)
|
||||
elif event:
|
||||
event_data = self._download_json(f'{api_url}/events/{event}', None)
|
||||
return self.playlist_result(
|
||||
self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
|
||||
|
||||
account_data = self._download_json(api_url, None)
|
||||
items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
|
||||
return self.playlist_result(
|
||||
itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
|
||||
account_data.get('id'), account_data.get('full_name'))
|
||||
|
||||
|
||||
# The original version of Livestream uses a different system
|
||||
class LivestreamOriginalIE(InfoExtractor):
|
||||
IE_NAME = 'livestream:original'
|
||||
_VALID_URL = r'''(?x)https?://original\.livestream\.com/
|
||||
(?P<user>[^/\?#]+)(?:/(?P<type>video|folder)
|
||||
(?:(?:\?.*?Id=|/)(?P<id>.*?)(&|$))?)?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'info_dict': {
|
||||
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
'duration': 771.301,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
'info_dict': {
|
||||
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://original.livestream.com/znsbahamas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, user, video_id):
|
||||
api_url = f'http://x{user}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={video_id}'
|
||||
info = self._download_xml(api_url, video_id)
|
||||
|
||||
item = info.find('channel').find('item')
|
||||
title = xpath_text(item, 'title')
|
||||
media_ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = xpath_attr(
|
||||
item, xpath_with_ns('media:thumbnail', media_ns), 'url')
|
||||
duration = float_or_none(xpath_attr(
|
||||
item, xpath_with_ns('media:content', media_ns), 'duration'))
|
||||
ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'}
|
||||
view_count = int_or_none(xpath_text(
|
||||
item, xpath_with_ns('ls:viewsCount', ls_ns)))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, video_data, video_id):
|
||||
formats = []
|
||||
|
||||
progressive_url = video_data.get('progressiveUrl')
|
||||
if progressive_url:
|
||||
formats.append({
|
||||
'url': progressive_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
m3u8_url = video_data.get('httpUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
rtsp_url = video_data.get('rtspUrl')
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_folder(self, url, folder_id):
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
paths = orderedSet(re.findall(
|
||||
r'''(?x)(?:
|
||||
<li\s+class="folder">\s*<a\s+href="|
|
||||
<a\s+href="(?=https?://livestre\.am/)
|
||||
)([^"]+)"''', webpage))
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': urllib.parse.urljoin(url, p),
|
||||
} for p in paths]
|
||||
|
||||
return self.playlist_result(entries, folder_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
user = mobj.group('user')
|
||||
url_type = mobj.group('type')
|
||||
content_id = mobj.group('id')
|
||||
if url_type == 'folder':
|
||||
return self._extract_folder(url, content_id)
|
||||
else:
|
||||
# this url is used on mobile devices
|
||||
stream_url = f'http://x{user}x.api.channel.livestream.com/3.0/getstream.json'
|
||||
info = {}
|
||||
if content_id:
|
||||
stream_url += f'?id={content_id}'
|
||||
info = self._extract_video_info(user, content_id)
|
||||
else:
|
||||
content_id = user
|
||||
webpage = self._download_webpage(url, content_id)
|
||||
info = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
|
||||
}
|
||||
video_data = self._download_json(stream_url, content_id)
|
||||
is_live = video_data.get('isLive')
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'title': info['title'],
|
||||
'formats': self._extract_video_formats(video_data, content_id),
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
# The server doesn't support HEAD request, the generic extractor can't detect
|
||||
# the redirection
|
||||
class LivestreamShortenerIE(InfoExtractor):
|
||||
IE_NAME = 'livestream:shortener'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return self.url_result(self._og_search_url(webpage))
|
||||
@ -1,325 +0,0 @@
|
||||
import itertools
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class LyndaBaseIE(InfoExtractor):
|
||||
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
|
||||
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
@staticmethod
|
||||
def _check_error(json_string, key_or_keys):
|
||||
keys = [key_or_keys] if isinstance(key_or_keys, str) else key_or_keys
|
||||
for key in keys:
|
||||
error = json_string.get(key)
|
||||
if error:
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
|
||||
def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
|
||||
action_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
|
||||
'post url', default=fallback_action_url, group='url')
|
||||
|
||||
if not action_url.startswith('http'):
|
||||
action_url = urllib.parse.urljoin(self._SIGNIN_URL, action_url)
|
||||
|
||||
form_data = self._hidden_inputs(form_html)
|
||||
form_data.update(extra_form_data)
|
||||
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, expected_status=(418, 500))
|
||||
|
||||
self._check_error(response, ('email', 'password', 'ErrorMessage'))
|
||||
|
||||
return response, action_url
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
# Step 1: download signin page
|
||||
signin_page = self._download_webpage(
|
||||
self._SIGNIN_URL, None, 'Downloading signin page')
|
||||
|
||||
# Already logged in
|
||||
if any(re.search(p, signin_page) for p in (
|
||||
r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
return
|
||||
|
||||
# Step 2: submit email
|
||||
signin_form = self._search_regex(
|
||||
r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
|
||||
signin_page, 'signin form')
|
||||
signin_page, signin_url = self._login_step(
|
||||
signin_form, self._PASSWORD_URL, {'email': username},
|
||||
'Submitting email', self._SIGNIN_URL)
|
||||
|
||||
# Step 3: submit password
|
||||
password_form = signin_page['body']
|
||||
self._login_step(
|
||||
password_form, self._USER_URL, {'email': username, 'password': password},
|
||||
'Submitting password', signin_url)
|
||||
|
||||
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:lynda\.com|educourse\.ga)/
|
||||
(?:
|
||||
(?:[^/]+/){2,3}(?P<course_id>\d+)|
|
||||
player/embed
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
'title': 'Using the exercise files',
|
||||
'duration': 68,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Status="NotFound", Message="Transcript not found"
|
||||
'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
self.raise_login_required(
|
||||
f'Video {video_id} is only available for members')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
course_id = mobj.group('course_id')
|
||||
|
||||
query = {
|
||||
'videoId': video_id,
|
||||
'type': 'video',
|
||||
}
|
||||
|
||||
video = self._download_json(
|
||||
'https://www.lynda.com/ajax/player', video_id,
|
||||
'Downloading video JSON', fatal=False, query=query)
|
||||
|
||||
# Fallback scenario
|
||||
if not video:
|
||||
query['courseId'] = course_id
|
||||
|
||||
play = self._download_json(
|
||||
f'https://www.lynda.com/ajax/course/{course_id}/{video_id}/play', video_id, 'Downloading play JSON')
|
||||
|
||||
if not play:
|
||||
self._raise_unavailable(video_id)
|
||||
|
||||
formats = []
|
||||
for formats_dict in play:
|
||||
urls = formats_dict.get('urls')
|
||||
if not isinstance(urls, dict):
|
||||
continue
|
||||
cdn = formats_dict.get('name')
|
||||
for format_id, format_url in urls.items():
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': f'{cdn}-{format_id}' if cdn else format_id,
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
|
||||
conviva = self._download_json(
|
||||
'https://www.lynda.com/ajax/player/conviva', video_id,
|
||||
'Downloading conviva JSON', query=query)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': conviva['VideoTitle'],
|
||||
'description': conviva.get('VideoDescription'),
|
||||
'release_year': int_or_none(conviva.get('ReleaseYear')),
|
||||
'duration': int_or_none(conviva.get('Duration')),
|
||||
'creator': conviva.get('Author'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
if 'Status' in video:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: {}'.format(video['Message']), expected=True)
|
||||
|
||||
if video.get('HasAccess') is False:
|
||||
self._raise_unavailable(video_id)
|
||||
|
||||
video_id = str(video.get('ID') or video_id)
|
||||
duration = int_or_none(video.get('DurationInSeconds'))
|
||||
title = video['Title']
|
||||
|
||||
formats = []
|
||||
|
||||
fmts = video.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([{
|
||||
'url': f['Url'],
|
||||
'ext': f.get('Extension'),
|
||||
'width': int_or_none(f.get('Width')),
|
||||
'height': int_or_none(f.get('Height')),
|
||||
'filesize': int_or_none(f.get('FileSize')),
|
||||
'format_id': str(f.get('Resolution')) if f.get('Resolution') else None,
|
||||
} for f in fmts if f.get('Url')])
|
||||
|
||||
prioritized_streams = video.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
|
||||
formats.extend([{
|
||||
'url': video_url,
|
||||
'height': int_or_none(format_id),
|
||||
'format_id': f'{prioritized_stream_id}-{format_id}',
|
||||
} for format_id, video_url in prioritized_stream.items()])
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _fix_subtitles(self, subs):
|
||||
srt = ''
|
||||
seq_counter = 0
|
||||
for seq_current, seq_next in itertools.pairwise(subs):
|
||||
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
||||
if m_current is None:
|
||||
continue
|
||||
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
||||
if m_next is None:
|
||||
continue
|
||||
appear_time = m_current.group('timecode')
|
||||
disappear_time = m_next.group('timecode')
|
||||
text = seq_current['Caption'].strip()
|
||||
if text:
|
||||
seq_counter += 1
|
||||
srt += f'{seq_counter}\r\n{appear_time} --> {disappear_time}\r\n{text}\r\n\r\n'
|
||||
if srt:
|
||||
return srt
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
url = f'https://www.lynda.com/ajax/player?videoId={video_id}&type=transcript'
|
||||
subs = self._download_webpage(
|
||||
url, video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
if not subs or 'Status="NotFound"' in subs:
|
||||
return {}
|
||||
subs = self._parse_json(subs, video_id, fatal=False)
|
||||
if not subs:
|
||||
return {}
|
||||
fixed_subs = self._fix_subtitles(subs)
|
||||
if fixed_subs:
|
||||
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||
return {}
|
||||
|
||||
|
||||
class LyndaCourseIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda:course'
|
||||
IE_DESC = 'lynda.com online courses'
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
item_template = f'https://www.lynda.com/{course_path}/%s-4.html'
|
||||
|
||||
course = self._download_json(
|
||||
f'https://www.lynda.com/ajax/player?courseId={course_id}&type=course',
|
||||
course_id, 'Downloading course JSON', fatal=False)
|
||||
|
||||
if not course:
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
entries = [
|
||||
self.url_result(
|
||||
item_template % video_id, ie=LyndaIE.ie_key(),
|
||||
video_id=video_id)
|
||||
for video_id in re.findall(
|
||||
r'data-video-id=["\'](\d+)', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, course_id,
|
||||
self._og_search_title(webpage, fatal=False),
|
||||
self._og_search_description(webpage))
|
||||
|
||||
if course.get('Status') == 'NotFound':
|
||||
raise ExtractorError(
|
||||
f'Course {course_id} does not exist', expected=True)
|
||||
|
||||
unaccessible_videos = 0
|
||||
entries = []
|
||||
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course['Chapters']:
|
||||
for video in chapter.get('Videos', []):
|
||||
if video.get('HasAccess') is False:
|
||||
unaccessible_videos += 1
|
||||
continue
|
||||
video_id = video.get('ID')
|
||||
if video_id:
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': item_template % video_id,
|
||||
'ie_key': LyndaIE.ie_key(),
|
||||
'chapter': chapter.get('Title'),
|
||||
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
|
||||
'chapter_id': str(chapter.get('ID')),
|
||||
})
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self.report_warning(
|
||||
f'{unaccessible_videos} videos are only available for members (or paid members) '
|
||||
f'and will not be downloaded. {self._ACCOUNT_CREDENTIALS_HINT}')
|
||||
|
||||
course_title = course.get('Title')
|
||||
course_description = course.get('Description')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title, course_description)
|
||||
@ -1,121 +0,0 @@
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_get,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftStreamIE(InfoExtractor):
|
||||
IE_NAME = 'microsoftstream'
|
||||
IE_DESC = 'Microsoft Stream'
|
||||
_VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_all_subtitles(self, api_url, video_id, headers):
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
text_tracks = self._download_json(
|
||||
f'{api_url}/videos/{video_id}/texttracks', video_id,
|
||||
note='Downloading subtitles JSON', fatal=False, headers=headers,
|
||||
query={'api-version': '1.4-private'}).get('value') or []
|
||||
for track in text_tracks:
|
||||
if not track.get('language') or not track.get('url'):
|
||||
continue
|
||||
sub_dict = automatic_captions if track.get('autoGenerated') else subtitles
|
||||
sub_dict.setdefault(track['language'], []).append({
|
||||
'ext': 'vtt',
|
||||
'url': track.get('url'),
|
||||
})
|
||||
return {
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
}
|
||||
|
||||
def extract_all_subtitles(self, *args, **kwargs):
|
||||
if (self.get_param('writesubtitles', False)
|
||||
or self.get_param('writeautomaticsub', False)
|
||||
or self.get_param('listsubtitles')):
|
||||
return self._get_all_subtitles(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if '<title>Microsoft Stream</title>' not in webpage:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token')
|
||||
api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url')
|
||||
|
||||
headers = {'Authorization': f'Bearer {access_token}'}
|
||||
|
||||
video_data = self._download_json(
|
||||
f'{api_url}/videos/{video_id}', video_id,
|
||||
headers=headers, query={
|
||||
'$expand': 'creator,tokens,status,liveEvent,extensions',
|
||||
'api-version': '1.4-private',
|
||||
})
|
||||
video_id = video_data.get('id') or video_id
|
||||
language = video_data.get('language')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'):
|
||||
thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumb = {
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
thumb_name = url_basename(thumbnail_url)
|
||||
thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
|
||||
thumb.update(parse_resolution(thumb_name))
|
||||
thumbnails.append(thumb)
|
||||
|
||||
formats = []
|
||||
for playlist in video_data['playbackUrls']:
|
||||
if playlist['mimeType'] == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playlist['playbackUrl'], video_id,
|
||||
ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False, headers=headers))
|
||||
elif playlist['mimeType'] == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
playlist['playbackUrl'], video_id, mpd_id='dash',
|
||||
fatal=False, headers=headers))
|
||||
elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
playlist['playbackUrl'], video_id, ism_id='mss',
|
||||
fatal=False, headers=headers))
|
||||
formats = [merge_dicts(f, {'language': language}) for f in formats]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['name'],
|
||||
'description': video_data.get('description'),
|
||||
'uploader': try_get(video_data, lambda x: x['creator']['name'], str),
|
||||
'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'],
|
||||
lambda x: x['creator']['id']), str),
|
||||
'thumbnails': thumbnails,
|
||||
**self.extract_all_subtitles(api_url, video_id, headers),
|
||||
'timestamp': parse_iso8601(video_data.get('created')),
|
||||
'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])),
|
||||
'webpage_url': f'https://web.microsoftstream.com/video/{video_id}',
|
||||
'view_count': try_get(video_data, lambda x: x['metrics']['views'], int),
|
||||
'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int),
|
||||
'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,45 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_codecs,
|
||||
)
|
||||
|
||||
|
||||
class MinotoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
player_id = mobj.group('player_id') or '1'
|
||||
video_id = mobj.group('id')
|
||||
video_data = self._download_json(f'http://play.minoto-video.com/{player_id}/{video_id}.js', video_id)
|
||||
video_metadata = video_data['video-metadata']
|
||||
formats = []
|
||||
for fmt in video_data['video-files']:
|
||||
fmt_url = fmt.get('url')
|
||||
if not fmt_url:
|
||||
continue
|
||||
container = fmt.get('container')
|
||||
if container == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
fmt_profile = fmt.get('profile') or {}
|
||||
formats.append({
|
||||
'format_id': fmt_profile.get('name-short'),
|
||||
'format_note': fmt_profile.get('name'),
|
||||
'url': fmt_url,
|
||||
'container': container,
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
**parse_codecs(fmt.get('codecs')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'thumbnail': video_metadata.get('video-poster', {}).get('url'),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MojvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
|
||||
'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
|
||||
'info_dict': {
|
||||
'id': '3d1ed4497707730b2906',
|
||||
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
|
||||
'ext': 'mp4',
|
||||
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
|
||||
'thumbnail': r're:^http://.*\.jpg$',
|
||||
'duration': 242,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
# XML is malformed
|
||||
playerapi = self._download_webpage(
|
||||
f'http://www.mojvideo.com/playerapi.php?v={video_id}&t=1', display_id)
|
||||
|
||||
if '<error>true</error>' in playerapi:
|
||||
error_desc = self._html_search_regex(
|
||||
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_desc}', expected=True)
|
||||
|
||||
title = self._html_extract_title(playerapi)
|
||||
video_url = self._html_search_regex(
|
||||
r'<file>([^<]+)</file>', playerapi, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
||||
@ -1,289 +0,0 @@
|
||||
import datetime as dt
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
remove_end,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/EE97006',
|
||||
'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
|
||||
'info_dict': {
|
||||
'id': 'EE97006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dogging blond Brit getting glazed (comp)',
|
||||
'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
|
||||
'upload_date': '20230519',
|
||||
'uploader_id': 'deathbird',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
# Incomplete cert chains
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
|
||||
'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'http://motherless.com/g/cosplay/633979F',
|
||||
'expected_exception': 'ExtractorError',
|
||||
}, {
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'info_dict': {
|
||||
'id': '8B4BBC1',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO00441.mp4',
|
||||
'categories': [],
|
||||
'upload_date': '20160214',
|
||||
'uploader_id': 'NMWildGirl',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
# see https://motherless.com/videos/recent for recent videos with
|
||||
# uploaded date in "ago" format
|
||||
'url': 'https://motherless.com/3C3E2CF',
|
||||
'info_dict': {
|
||||
'id': '3C3E2CF',
|
||||
'ext': 'mp4',
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(p in webpage for p in (
|
||||
'<title>404 - MOTHERLESS.COM<',
|
||||
">The page you're looking for cannot be found.<",
|
||||
'<div class="error-page',
|
||||
)):
|
||||
raise ExtractorError(f'Video {video_id} does not exist', expected=True)
|
||||
|
||||
if '>The content you are trying to view is for friends only.' in webpage:
|
||||
raise ExtractorError(f'Video {video_id} is for friends only', expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
|
||||
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
|
||||
video_url = (self._html_search_regex(
|
||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'video URL', default=None, group='url')
|
||||
or f'http://cdn4.videos.motherlessmedia.com/videos/{video_id}.mp4?fs=opencloud')
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
(r'>([\d,.]+)\s+Favorites<',
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
|
||||
'upload date', default=None))
|
||||
if not upload_date:
|
||||
uploaded_ago = self._search_regex(
|
||||
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
|
||||
default=None)
|
||||
if uploaded_ago:
|
||||
delta = int(uploaded_ago[:-1])
|
||||
_AGO_UNITS = {
|
||||
'h': 'hours',
|
||||
'd': 'days',
|
||||
}
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id', fatal=False)
|
||||
categories = self._html_search_meta('keywords', webpage, default='')
|
||||
categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'categories': categories,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_EXTRA_QUERY = {}
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _correct_title(self, title, /):
|
||||
return title.partition(' - Videos')[0] if title else None
|
||||
|
||||
def _extract_entries(self, webpage, base):
|
||||
for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
|
||||
webpage):
|
||||
video_url = urllib.parse.urljoin(base, mobj.group('href'))
|
||||
video_id = MotherlessIE.get_temp_id(video_url)
|
||||
|
||||
if video_id:
|
||||
yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
real_url = self._correct_path(url, item_id)
|
||||
webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
|
||||
|
||||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
|
||||
self._correct_title(self._html_extract_title(webpage)))
|
||||
|
||||
|
||||
class MotherlessGroupIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
},
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
'url': 'http://motherless.com/g/sex_must_be_funny',
|
||||
'info_dict': {
|
||||
'id': 'sex_must_be_funny',
|
||||
'title': 'Sex must be funny',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
'url': 'https://motherless.com/gv/beautiful_cock',
|
||||
'info_dict': {
|
||||
'id': 'beautiful_cock',
|
||||
'title': 'Beautiful Cock',
|
||||
},
|
||||
'playlist_mincount': 371,
|
||||
}]
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/gv/{item_id}')
|
||||
|
||||
|
||||
class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/GV338999F',
|
||||
'info_dict': {
|
||||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
'id': 'ABD6213',
|
||||
'title': 'Cuties',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVBCF7622',
|
||||
'info_dict': {
|
||||
'id': 'BCF7622',
|
||||
'title': 'Vintage',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
'url': 'https://motherless.com/G035DE2F',
|
||||
'info_dict': {
|
||||
'id': '035DE2F',
|
||||
'title': 'General',
|
||||
},
|
||||
'playlist_mincount': 234,
|
||||
}]
|
||||
|
||||
def _correct_title(self, title, /):
|
||||
return remove_end(title, ' | MOTHERLESS.COM ™')
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
|
||||
class MotherlessUploaderIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/u/Mrgo4hrs2023',
|
||||
'info_dict': {
|
||||
'id': 'Mrgo4hrs2023',
|
||||
'title': "Mrgo4hrs2023's Uploads",
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://motherless.com/u/Happy_couple?t=v',
|
||||
'info_dict': {
|
||||
'id': 'Happy_couple',
|
||||
'title': "Happy_couple's Uploads",
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
_EXTRA_QUERY = {'t': 'v'}
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
|
||||
@ -1,43 +0,0 @@
|
||||
from .jixie import JixieBaseIE
|
||||
|
||||
|
||||
class MoviewPlayIE(JixieBaseIE):
|
||||
_VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# drm hls, only use direct link
|
||||
'url': 'https://www.moview.id/play/174/Candy-Monster',
|
||||
'info_dict': {
|
||||
'id': '146182',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'Candy-Monster',
|
||||
'uploader_id': 'Mo165qXUUf',
|
||||
'duration': 528.2,
|
||||
'title': 'Candy Monster',
|
||||
'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
|
||||
'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
|
||||
},
|
||||
}, {
|
||||
# non-drm hls
|
||||
'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
|
||||
'info_dict': {
|
||||
'id': '28210',
|
||||
'ext': 'mp4',
|
||||
'duration': 2595.666667,
|
||||
'display_id': 'Paris-Van-Java-Episode-16',
|
||||
'uploader_id': 'Mo165qXUUf',
|
||||
'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
|
||||
'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
|
||||
'title': 'Paris Van Java Episode 16',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
|
||||
|
||||
return self._extract_data_from_jixie_id(display_id, video_id, webpage)
|
||||
@ -1,38 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MoviezineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.moviezine.se/video/205866',
|
||||
'info_dict': {
|
||||
'id': '205866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oculus - Trailer 1',
|
||||
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
jsplayer = self._download_webpage(f'http://www.moviezine.se/api/player.js?video={video_id}', video_id, 'Downloading js api player')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
|
||||
'quality': 0,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
|
||||
'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
@ -1,174 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
date_from_str,
|
||||
format_field,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MusicdexBaseIE(InfoExtractor):
|
||||
def _return_info(self, track_json, album_json, video_id):
|
||||
return {
|
||||
'id': str(video_id),
|
||||
'title': track_json.get('name'),
|
||||
'track': track_json.get('name'),
|
||||
'description': track_json.get('description'),
|
||||
'track_number': track_json.get('number'),
|
||||
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
|
||||
'duration': track_json.get('duration'),
|
||||
'genres': [genre.get('name') for genre in track_json.get('genres') or []],
|
||||
'like_count': track_json.get('likes_count'),
|
||||
'view_count': track_json.get('plays'),
|
||||
'artists': [artist.get('name') for artist in track_json.get('artists') or []],
|
||||
'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
|
||||
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'album': album_json.get('name'),
|
||||
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
'extractor_key': MusicdexSongIE.ie_key(),
|
||||
'extractor': 'MusicdexSong',
|
||||
}
|
||||
|
||||
|
||||
class MusicdexSongIE(MusicdexBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/track/306/dual-existence',
|
||||
'info_dict': {
|
||||
'id': '306',
|
||||
'ext': 'mp3',
|
||||
'title': 'dual existence',
|
||||
'description': '#NIPPONSEI @ IRC.RIZON.NET',
|
||||
'track': 'dual existence',
|
||||
'track_number': 1,
|
||||
'duration': 266000,
|
||||
'genres': ['Anime'],
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'artists': ['fripSide'],
|
||||
'album_artists': ['fripSide'],
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
|
||||
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
|
||||
'release_year': 2020,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data_json = self._download_json(
|
||||
f'https://www.musicdex.org/secure/tracks/{video_id}?defaultRelations=true', video_id)['track']
|
||||
return self._return_info(data_json, data_json.get('album') or {}, video_id)
|
||||
|
||||
|
||||
class MusicdexAlbumIE(MusicdexBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': '56',
|
||||
'genres': ['OST'],
|
||||
'view_count': int,
|
||||
'artists': ['TENMON & Eiichiro Yanagi / minori'],
|
||||
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
|
||||
'release_year': 2008,
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
data_json = self._download_json(
|
||||
f'https://www.musicdex.org/secure/albums/{playlist_id}?defaultRelations=true', playlist_id)['album']
|
||||
entries = [self._return_info(track, data_json, track['id'])
|
||||
for track in data_json.get('tracks') or [] if track.get('id')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': data_json.get('name'),
|
||||
'description': data_json.get('description'),
|
||||
'genres': [genre.get('name') for genre in data_json.get('genres') or []],
|
||||
'view_count': data_json.get('plays'),
|
||||
'artists': [artist.get('name') for artist in data_json.get('artists') or []],
|
||||
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
def _entries(self, playlist_id):
|
||||
next_page_url = self._API_URL % playlist_id
|
||||
while next_page_url:
|
||||
data_json = self._download_json(next_page_url, playlist_id)['pagination']
|
||||
yield from data_json.get('data') or []
|
||||
next_page_url = data_json.get('next_page_url')
|
||||
|
||||
|
||||
class MusicdexArtistIE(MusicdexPageIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
|
||||
_API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/artist/11/fripside',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': '11',
|
||||
'view_count': int,
|
||||
'title': 'fripSide',
|
||||
'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{playlist_id}', playlist_id)['artist']
|
||||
entries = []
|
||||
for album in self._entries(playlist_id):
|
||||
entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': data_json.get('name'),
|
||||
'view_count': data_json.get('plays'),
|
||||
'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class MusicdexPlaylistIE(MusicdexPageIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
|
||||
_API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/playlist/9/test',
|
||||
'playlist_mincount': 73,
|
||||
'info_dict': {
|
||||
'id': '9',
|
||||
'view_count': int,
|
||||
'title': 'Test',
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
|
||||
'description': 'Test 123 123 21312 32121321321321312',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{playlist_id}', playlist_id)['playlist']
|
||||
entries = [self._return_info(track, track.get('album') or {}, track['id'])
|
||||
for track in self._entries(playlist_id) or [] if track.get('id')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': data_json.get('name'),
|
||||
'description': data_json.get('description'),
|
||||
'view_count': data_json.get('plays'),
|
||||
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'entries': entries,
|
||||
}
|
||||
@ -1,64 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from .fox import FOXIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicVideoIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:video'
|
||||
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'md5': '730855d559abbad6b42c2be1fa584917',
|
||||
'info_dict': {
|
||||
'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
'timestamp': 1423523799,
|
||||
'upload_date': '20150209',
|
||||
'uploader': 'NAGS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Redirects to main page',
|
||||
},
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'md5': '6a3105eb448c070503b3105fb9b320b5',
|
||||
'info_dict': {
|
||||
'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
'timestamp': 1433772632,
|
||||
'upload_date': '20150608',
|
||||
'uploader': 'NAGS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Redirects to main page',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
guid = self._search_regex(
|
||||
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||
webpage, 'guid')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ngs/media/guid/2423130747/{guid}?mbr=true',
|
||||
{'force_smil_url': True}),
|
||||
'id': guid,
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@ -13,11 +13,9 @@ from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
@ -284,142 +282,3 @@ class NaverLiveIE(NaverBaseIE):
|
||||
}), get_all=False),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class NaverNowIE(NaverBaseIE):
|
||||
IE_NAME = 'navernow'
|
||||
_VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)'
|
||||
_API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4'
|
||||
_TESTS = [{
|
||||
'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=',
|
||||
'md5': 'e05854162c21c221481de16b2944a0bc',
|
||||
'info_dict': {
|
||||
'id': '4759-26331132',
|
||||
'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1650369600,
|
||||
'upload_date': '20220419',
|
||||
'uploader_id': 'now',
|
||||
'view_count': int,
|
||||
'uploader_url': 'https://now.naver.com/show/4759',
|
||||
'uploader': '아이키의 떰즈업',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
|
||||
'md5': '9f6118e398aa0f22b2152f554ea7851b',
|
||||
'info_dict': {
|
||||
'id': '4759-26601461',
|
||||
'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20220504',
|
||||
'timestamp': 1651648311,
|
||||
'uploader_id': 'now',
|
||||
'view_count': int,
|
||||
'uploader_url': 'https://now.naver.com/show/4759',
|
||||
'uploader': '아이키의 떰즈업',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://now.naver.com/s/now.4759',
|
||||
'info_dict': {
|
||||
'id': '4759',
|
||||
'title': '아이키의 떰즈업',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}, {
|
||||
'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay',
|
||||
'info_dict': {
|
||||
'id': '4759',
|
||||
'title': '아이키의 떰즈업',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}, {
|
||||
'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
|
||||
'info_dict': {
|
||||
'id': '4759',
|
||||
'title': '아이키의 떰즈업',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}, {
|
||||
'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_replay(self, show_id, replay_id):
|
||||
vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id)
|
||||
in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
|
||||
return merge_dicts({
|
||||
'id': f'{show_id}-{replay_id}',
|
||||
'title': traverse_obj(vod_info, ('episode', 'title')),
|
||||
'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
|
||||
'thumbnail': vod_info.get('thumbnail_image_url'),
|
||||
}, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
|
||||
|
||||
def _extract_show_replays(self, show_id):
|
||||
page_size = 15
|
||||
page = 1
|
||||
while True:
|
||||
show_vod_info = self._download_json(
|
||||
f'{self._API_URL}/vod-shows/now.{show_id}', show_id,
|
||||
query={'page': page, 'page_size': page_size},
|
||||
note=f'Downloading JSON vod list for show {show_id} - page {page}',
|
||||
)['response']['result']
|
||||
for v in show_vod_info.get('vod_list') or []:
|
||||
yield self._extract_replay(show_id, v['id'])
|
||||
|
||||
if len(show_vod_info.get('vod_list') or []) < page_size:
|
||||
break
|
||||
page += 1
|
||||
|
||||
def _extract_show_highlights(self, show_id, highlight_id=None):
|
||||
page_size = 10
|
||||
page = 1
|
||||
while True:
|
||||
highlights_videos = self._download_json(
|
||||
f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id,
|
||||
query={'page': page, 'page_size': page_size},
|
||||
note=f'Downloading JSON highlights for show {show_id} - page {page}')
|
||||
|
||||
for highlight in highlights_videos.get('results') or []:
|
||||
if highlight_id and highlight.get('clip_no') != int(highlight_id):
|
||||
continue
|
||||
yield merge_dicts({
|
||||
'id': f'{show_id}-{highlight["clip_no"]}',
|
||||
'title': highlight.get('title'),
|
||||
'timestamp': unified_timestamp(highlight.get('regdate')),
|
||||
'thumbnail': highlight.get('thumbnail_url'),
|
||||
}, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey']))
|
||||
|
||||
if len(highlights_videos.get('results') or []) < page_size:
|
||||
break
|
||||
page += 1
|
||||
|
||||
def _extract_highlight(self, show_id, highlight_id):
|
||||
try:
|
||||
return next(self._extract_show_highlights(show_id, highlight_id))
|
||||
except StopIteration:
|
||||
raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
|
||||
|
||||
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
|
||||
return self._extract_highlight(show_id, qs['shareHightlight'][0])
|
||||
elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
|
||||
return self._extract_replay(show_id, qs['shareReplayId'][0])
|
||||
|
||||
show_info = self._download_json(
|
||||
f'{self._API_URL}/shows/now.{show_id}/', show_id,
|
||||
note=f'Downloading JSON vod list for show {show_id}')
|
||||
|
||||
return self.playlist_result(
|
||||
itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
|
||||
show_id, show_info.get('title'))
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NerdCubedFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'http://www.nerdcubed.co.uk/',
|
||||
'info_dict': {
|
||||
'id': 'nerdcubed-feed',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
},
|
||||
'playlist_mincount': 5500,
|
||||
}
|
||||
|
||||
def _extract_video(self, feed_entry):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
|
||||
**traverse_obj(feed_entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'channel': ('source', 'name', {str}),
|
||||
'channel_id': ('source', 'id', {str}),
|
||||
'channel_url': ('source', 'url', {str}),
|
||||
'thumbnail': ('thumbnail', 'source', {url_or_none}),
|
||||
}), url_transparent=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'nerdcubed-feed'
|
||||
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
|
||||
video_id, 'nerdcubed.co.uk feed')
|
||||
@ -1,281 +0,0 @@
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..utils import smuggle_url, traverse_obj
|
||||
|
||||
|
||||
class NetverseBaseIE(InfoExtractor):
|
||||
_ENDPOINTS = {
|
||||
'watch': 'watchvideo',
|
||||
'video': 'watchvideo',
|
||||
'webseries': 'webseries',
|
||||
'season': 'webseason_videos',
|
||||
}
|
||||
|
||||
def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
|
||||
return self._download_json(
|
||||
f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
|
||||
display_id or slug, query=query)
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
last_page_number = None
|
||||
for i in itertools.count(1):
|
||||
comment_data = self._download_json(
|
||||
f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
|
||||
video_id, data=b'', fatal=False, query={'page': i},
|
||||
note=f'Downloading JSON comment metadata page {i}') or {}
|
||||
yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
|
||||
'id': '_id',
|
||||
'text': 'comment',
|
||||
'author_id': 'customer_id',
|
||||
'author': ('customer', 'name'),
|
||||
'author_thumbnail': ('customer', 'profile_picture'),
|
||||
}))
|
||||
|
||||
if not last_page_number:
|
||||
last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
|
||||
if i >= (last_page_number or 0):
|
||||
break
|
||||
|
||||
|
||||
class NetverseIE(NetverseBaseIE):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Watch video
|
||||
'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
|
||||
'info_dict': {
|
||||
'id': 'k4yhqUwINAGtmHx3NkL',
|
||||
'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
|
||||
'ext': 'mp4',
|
||||
'season': 'Season 2016',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'episode_number': 22,
|
||||
'episode': 'Episode 22',
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
|
||||
'duration': 2990,
|
||||
'upload_date': '20210722',
|
||||
'timestamp': 1626919804,
|
||||
'like_count': int,
|
||||
'uploader': 'Net Prime',
|
||||
},
|
||||
}, {
|
||||
# series
|
||||
'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
|
||||
'info_dict': {
|
||||
'id': 'x88izwc',
|
||||
'title': 'Jadoo Seorang Model',
|
||||
'ext': 'mp4',
|
||||
'season': 'Season 2',
|
||||
'description': 'md5:8a74f70812cca267e19ee0635f0af835',
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'episode_number': 2,
|
||||
'episode': 'Episode 2',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'display_id': 'jadoo-seorang-model',
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'duration': 635,
|
||||
'timestamp': 1646372927,
|
||||
'tags': ['PG069497-hellojadooseason2eps2'],
|
||||
'upload_date': '20220304',
|
||||
'uploader': 'Net Prime',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'video get Geo-blocked for some country',
|
||||
}, {
|
||||
# non www host
|
||||
'url': 'https://netverse.id/watch/tetangga-baru',
|
||||
'info_dict': {
|
||||
'id': 'k4CNGz7V0HJ7vfwZbXy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tetangga Baru',
|
||||
'season': 'Season 1',
|
||||
'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1624538169,
|
||||
'view_count': int,
|
||||
'upload_date': '20210624',
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'like_count': int,
|
||||
'uploader': 'Net Prime',
|
||||
'tags': ['PG008534', 'tetangga', 'Baru'],
|
||||
'display_id': 'tetangga-baru',
|
||||
'duration': 1406,
|
||||
},
|
||||
}, {
|
||||
# /video url
|
||||
'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
|
||||
'title': 'Namaku Choi Jadoo',
|
||||
'info_dict': {
|
||||
'id': 'x887jzz',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
|
||||
'title': 'Namaku Choi Jadoo',
|
||||
'episode': 'Episode 1',
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
|
||||
'duration': 780,
|
||||
'display_id': 'pg067482-hellojadoo-season1',
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'uploader': 'Net Prime',
|
||||
'timestamp': 1645764984,
|
||||
'upload_date': '20220225',
|
||||
},
|
||||
'skip': 'This video get Geo-blocked for some country',
|
||||
}, {
|
||||
# video with comments
|
||||
'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
|
||||
'info_dict': {
|
||||
'id': 'k6hetBPiQMljSxxvAy7',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'display_id': 'episode-1-season-2016-ok-food',
|
||||
'like_count': int,
|
||||
'description': '',
|
||||
'duration': 1471,
|
||||
'age_limit': 0,
|
||||
'timestamp': 1642405848,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 2016',
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'title': 'Episode 1 - Season 2016 - Ok Food',
|
||||
'upload_date': '20220117',
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Net Prime',
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# video with multiple page comment
|
||||
'url': 'https://netverse.id/video/match-island-eps-1-fix',
|
||||
'info_dict': {
|
||||
'id': 'x8aznjc',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'tags': ['Match-Island', 'Pd00111'],
|
||||
'display_id': 'match-island-eps-1-fix',
|
||||
'view_count': int,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Net Prime',
|
||||
'duration': 4070,
|
||||
'timestamp': 1653068165,
|
||||
'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
|
||||
'age_limit': 0,
|
||||
'title': 'Welcome To Match Island',
|
||||
'upload_date': '20220520',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
|
||||
'uploader_id': 'x2ir3vq',
|
||||
'season': 'Season 1',
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
|
||||
program_json = self._call_api(display_id, sites_type)
|
||||
videos = program_json['response']['videos']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': DailymotionIE.ie_key(),
|
||||
'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
|
||||
'display_id': display_id,
|
||||
'title': videos.get('title'),
|
||||
'season': videos.get('season_name'),
|
||||
'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
|
||||
'description': traverse_obj(videos, ('program_detail', 'description')),
|
||||
'episode_number': videos.get('episode_order'),
|
||||
'__post_extractor': self.extract_comments(display_id),
|
||||
}
|
||||
|
||||
|
||||
class NetversePlaylistIE(NetverseBaseIE):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# multiple season
|
||||
'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
|
||||
'info_dict': {
|
||||
'id': 'tetangga-masa-gitu',
|
||||
'title': 'Tetangga Masa Gitu',
|
||||
},
|
||||
'playlist_count': 519,
|
||||
}, {
|
||||
# single season
|
||||
'url': 'https://netverse.id/webseries/kelas-internasional',
|
||||
'info_dict': {
|
||||
'id': 'kelas-internasional',
|
||||
'title': 'Kelas Internasional',
|
||||
},
|
||||
'playlist_count': 203,
|
||||
}]
|
||||
|
||||
def parse_playlist(self, json_data, playlist_id):
|
||||
slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
|
||||
for season in traverse_obj(json_data, ('seasons', ..., 'id')):
|
||||
playlist_json = self._call_api(
|
||||
slug_sample, 'season', display_id=playlist_id, season_id=season)
|
||||
|
||||
for current_page in range(playlist_json['response']['season_list']['last_page']):
|
||||
playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
|
||||
season_id=season, display_id=playlist_id)
|
||||
for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
|
||||
yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
|
||||
playlist_data = self._call_api(playlist_id, sites_type)
|
||||
|
||||
return self.playlist_result(
|
||||
self.parse_playlist(playlist_data['response'], playlist_id),
|
||||
traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
|
||||
traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
|
||||
|
||||
|
||||
class NetverseSearchIE(SearchInfoExtractor):
|
||||
_SEARCH_KEY = 'netsearch'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'netsearch10:tetangga',
|
||||
'info_dict': {
|
||||
'id': 'tetangga',
|
||||
'title': 'tetangga',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
|
||||
def _search_results(self, query):
|
||||
last_page = None
|
||||
for i in itertools.count(1):
|
||||
search_data = self._download_json(
|
||||
'https://api.netverse.id/search/elastic/search', query,
|
||||
query={'q': query, 'page': i}, note=f'Downloading page {i}')
|
||||
|
||||
videos = traverse_obj(search_data, ('response', 'data', ...))
|
||||
for video in videos:
|
||||
yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
|
||||
|
||||
last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
|
||||
if not videos or i >= (last_page or 0):
|
||||
break
|
||||
@ -1,201 +0,0 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
UserNotLive,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NuumBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, description, query={}):
|
||||
response = self._download_json(
|
||||
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
|
||||
note=f'Downloading {description} metadata',
|
||||
errnote=f'Unable to download {description} metadata')
|
||||
if error := response.get('error'):
|
||||
raise ExtractorError(f'API returned error: {error!r}')
|
||||
return response['result']
|
||||
|
||||
def _get_channel_info(self, channel_name):
|
||||
return self._call_api(
|
||||
'broadcasts/public', video_id=channel_name, description='channel',
|
||||
query={
|
||||
'with_extra': 'true',
|
||||
'channel_name': channel_name,
|
||||
'with_deleted': 'true',
|
||||
})
|
||||
|
||||
def _parse_video_data(self, container, extract_formats=True):
|
||||
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
|
||||
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
|
||||
media_url = traverse_obj(media, (
|
||||
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
|
||||
|
||||
video_id = str(container['media_container_id'])
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
headers = {'Referer': 'https://nuum.ru/'}
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, headers=headers)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'channel': ('media_container_channel', 'channel_name', {str}),
|
||||
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
|
||||
}),
|
||||
**traverse_obj(stream, {
|
||||
'view_count': ('stream_total_viewers', {int_or_none}),
|
||||
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(media, {
|
||||
'duration': ('media_duration', {int_or_none}),
|
||||
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
})
|
||||
|
||||
|
||||
class NuumMediaIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:media'
|
||||
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toxi$ - Hurtz',
|
||||
'description': '',
|
||||
'timestamp': 1702631651,
|
||||
'upload_date': '20231215',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '6911',
|
||||
'channel': 'toxis',
|
||||
'duration': 116,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuum.ru/clips/1552564-pro-misu',
|
||||
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
|
||||
'info_dict': {
|
||||
'id': '1552564',
|
||||
'ext': 'mp4',
|
||||
'title': 'Про Мису 🙃',
|
||||
'timestamp': 1701971828,
|
||||
'upload_date': '20231207',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '3320',
|
||||
'channel': 'Misalelik',
|
||||
'duration': 41,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
|
||||
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class NuumLiveIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:live'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/mts_live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
channel_info = self._get_channel_info(channel)
|
||||
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
info = self._parse_video_data(channel_info['media_container'])
|
||||
return {
|
||||
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
|
||||
'extractor_key': NuumMediaIE.ie_key(),
|
||||
'extractor': NuumMediaIE.IE_NAME,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class NuumTabIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:tab'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/dankon_/clips',
|
||||
'info_dict': {
|
||||
'id': 'dankon__clips',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/videos',
|
||||
'info_dict': {
|
||||
'id': 'dankon__videos',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/streams',
|
||||
'info_dict': {
|
||||
'id': 'dankon__streams',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, channel_id, tab_type, tab_id, page):
|
||||
CONTAINER_TYPES = {
|
||||
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
|
||||
'videos': ['LONG_VIDEO'],
|
||||
'streams': ['SINGLE'],
|
||||
}
|
||||
|
||||
media_containers = self._call_api(
|
||||
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
|
||||
query={
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
'channel_id': channel_id,
|
||||
'media_container_status': 'STOPPED',
|
||||
'media_container_type': CONTAINER_TYPES[tab_type],
|
||||
})
|
||||
for container in traverse_obj(media_containers, (..., {dict})):
|
||||
metadata = self._parse_video_data(container, extract_formats=False)
|
||||
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
|
||||
tab_id = f'{channel_name}_{tab_type}'
|
||||
channel_data = self._get_channel_info(channel_name)['channel']
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
|
||||
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
|
||||
@ -1,41 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class OnionStudiosIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
|
||||
_EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
||||
'md5': '5a118d466d62b5cd03647cf2c593977f',
|
||||
'info_dict': {
|
||||
'id': '3459881',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hannibal charges forward, stops for a cocktail',
|
||||
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'a.v. club',
|
||||
'upload_date': '20150619',
|
||||
'timestamp': 1434728546,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/video/6139.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
|
||||
video_id)
|
||||
mcp_id = str(self._parse_json(self._search_regex(
|
||||
r'window\.mcpMapping\s*=\s*({.+?});', webpage,
|
||||
'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
|
||||
return self.url_result(
|
||||
'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
|
||||
'KinjaEmbed', mcp_id)
|
||||
@ -1,72 +0,0 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class OraTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
|
||||
'md5': 'fa33717591c631ec93b04b0e330df786',
|
||||
'info_dict': {
|
||||
'id': '50178',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
|
||||
'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_data = self._search_regex(
|
||||
r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video')
|
||||
m3u8_url = self._search_regex(
|
||||
r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
|
||||
if m3u8_url:
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
# similar to GameSpotIE
|
||||
m3u8_path = urllib.parse.urlparse(m3u8_url).path
|
||||
QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
|
||||
available_qualities = self._search_regex(
|
||||
QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',')
|
||||
http_path = m3u8_path[1:].split('/', 1)[1]
|
||||
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
|
||||
http_template = http_template.replace('.csmil/master.m3u8', '')
|
||||
http_template = urllib.parse.urljoin(
|
||||
'http://videocdn-pmd.ora.tv/', http_template)
|
||||
preference = qualities(
|
||||
['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080'])
|
||||
for q in available_qualities:
|
||||
formats.append({
|
||||
'url': http_template % q,
|
||||
'format_id': q,
|
||||
'quality': preference(q),
|
||||
})
|
||||
else:
|
||||
return self.url_result(self._search_regex(
|
||||
r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
|
||||
|
||||
return {
|
||||
'id': self._search_regex(
|
||||
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
|
||||
'display_id': display_id,
|
||||
'title': unescapeHTML(self._og_search_title(webpage)),
|
||||
'description': get_element_by_attribute(
|
||||
'class', 'video_txt_decription', webpage),
|
||||
'thumbnail': self._proto_relative_url(self._search_regex(
|
||||
r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,99 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PiramideTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/video/wWtBAORdJUTh',
|
||||
'info_dict': {
|
||||
'id': 'wWtBAORdJUTh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:79f9c8183ea6a35c836923142cf0abcc',
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg',
|
||||
'channel': 'León Picarón',
|
||||
'channel_id': 'leonpicaron',
|
||||
'timestamp': 1696460362,
|
||||
'upload_date': '20231004',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://piramide.tv/video/wcYn6li79NgN',
|
||||
'info_dict': {
|
||||
'id': 'wcYn6li79NgN',
|
||||
'ext': 'mp4',
|
||||
'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1',
|
||||
'description': '',
|
||||
'channel': 'ARTA GAME',
|
||||
'channel_id': 'arta_game',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg',
|
||||
'timestamp': 1703434976,
|
||||
'upload_date': '20231224',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
video_data = self._download_json(
|
||||
f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False)
|
||||
next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str}))
|
||||
return next_video, {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, ('video', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('media', 'thumbnail', {url_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _entries(self, video_id):
|
||||
visited = set()
|
||||
while True:
|
||||
visited.add(video_id)
|
||||
next_video, info = self._extract_video(video_id)
|
||||
yield info
|
||||
if not next_video or next_video in visited:
|
||||
break
|
||||
video_id = next_video
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
if self._yes_playlist(video_id, video_id, smuggled_data):
|
||||
return self.playlist_result(self._entries(video_id), video_id)
|
||||
return self._extract_video(video_id)[1]
|
||||
|
||||
|
||||
class PiramideTVChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/channel/thekalo',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'thekalo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, channel_name):
|
||||
videos = self._download_json(
|
||||
f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name)
|
||||
for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])):
|
||||
yield self.url_result(smuggle_url(
|
||||
f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}),
|
||||
**traverse_obj(video, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(channel_name), channel_name)
|
||||
@ -1,72 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class PlanetMarathiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
'id': 'ek-unad-divas',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas',
|
||||
'ext': 'mp4',
|
||||
'title': 'ek unad divas',
|
||||
'alt_title': 'चित्रपट',
|
||||
'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881',
|
||||
'episode_number': 1,
|
||||
'duration': 5539,
|
||||
'upload_date': '20210829',
|
||||
},
|
||||
}], # Trailer skipped
|
||||
}, {
|
||||
'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'baap-beep-baap-season-1',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Manohar Kanhere',
|
||||
'alt_title': 'मनोहर कान्हेरे',
|
||||
'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'duration': 29,
|
||||
'upload_date': '20210829',
|
||||
},
|
||||
}], # Trailers, Episodes, other Character profiles skipped
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
entries = []
|
||||
json_data = self._download_json(
|
||||
f'https://www.planetmarathi.com/api/v1/titles/{playlist_id}/assets', playlist_id)['assets']
|
||||
for asset in json_data:
|
||||
asset_title = asset['mediaAssetName']['en']
|
||||
if asset_title == 'Movie':
|
||||
asset_title = playlist_id.replace('-', ' ')
|
||||
asset_id = f'{asset["sk"]}_{playlist_id}'.replace('#', '-')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id)
|
||||
entries.append({
|
||||
'id': asset_id,
|
||||
'title': asset_title,
|
||||
'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']),
|
||||
'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']),
|
||||
'season_number': asset.get('mediaAssetSeason'),
|
||||
'episode_number': asset.get('mediaAssetIndexForAssetType'),
|
||||
'duration': asset.get('mediaAssetDurationInSeconds'),
|
||||
'upload_date': unified_strdate(asset.get('created')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return self.playlist_result(entries, playlist_id=playlist_id)
|
||||
@ -1,100 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import PUTRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError, clean_html, int_or_none
|
||||
|
||||
|
||||
class PlayPlusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
|
||||
_TEST = {
|
||||
'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
|
||||
'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
|
||||
'info_dict': {
|
||||
'id': 'db8d274a5163424e967f35a30ddafb8e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 179 - Final',
|
||||
'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
|
||||
'timestamp': 1529992740,
|
||||
'upload_date': '20180626',
|
||||
},
|
||||
'skip': 'Requires account credential',
|
||||
}
|
||||
_NETRC_MACHINE = 'playplustv'
|
||||
_GEO_COUNTRIES = ['BR']
|
||||
_token = None
|
||||
_profile_id = None
|
||||
|
||||
def _call_api(self, resource, video_id=None, query=None):
|
||||
return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
|
||||
'Authorization': 'Bearer ' + self._token,
|
||||
}, query=query)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
req = PUTRequest(
|
||||
'https://api.playplus.tv/api/web/login', json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
}).encode(), {
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
|
||||
try:
|
||||
self._token = self._download_json(req, None)['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.response.read(), None)['errorMessage'], expected=True)
|
||||
raise
|
||||
|
||||
self._profile = self._call_api('Profiles')['list'][0]['_id']
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._token:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
def _real_extract(self, url):
|
||||
project_id, media_id = self._match_valid_url(url).groups()
|
||||
media = self._call_api(
|
||||
'Media', media_id, {
|
||||
'profileId': self._profile,
|
||||
'projectId': project_id,
|
||||
'mediaId': media_id,
|
||||
})['obj']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for f in media.get('files', []):
|
||||
f_url = f.get('url')
|
||||
if not f_url:
|
||||
continue
|
||||
file_info = f.get('fileInfo') or {}
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'width': int_or_none(file_info.get('width')),
|
||||
'height': int_or_none(file_info.get('height')),
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumb in media.get('thumbs', []):
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumb_url,
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clean_html(media.get('description')) or media.get('shortDescription'),
|
||||
'timestamp': int_or_none(media.get('publishDate'), 1000),
|
||||
'view_count': int_or_none(media.get('numberOfViews')),
|
||||
'comment_count': int_or_none(media.get('numberOfComments')),
|
||||
'tags': media.get('tags'),
|
||||
}
|
||||
@ -1,79 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PlaywireIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
|
||||
_EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
|
||||
'md5': 'e6398701e3595888125729eaa2329ed9',
|
||||
'info_dict': {
|
||||
'id': '3353705',
|
||||
'ext': 'mp4',
|
||||
'title': 'S04_RM_UCL_Rus',
|
||||
'thumbnail': r're:^https?://.*\.png$',
|
||||
'duration': 145.94,
|
||||
},
|
||||
'skip': 'Invalid URL',
|
||||
}, {
|
||||
# m3u8 in f4m
|
||||
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
|
||||
'info_dict': {
|
||||
'id': '4840492',
|
||||
'ext': 'mp4',
|
||||
'title': 'ITV EL SHOW FULL',
|
||||
},
|
||||
'skip': 'Invalid URL',
|
||||
}, {
|
||||
# Multiple resolutions while bitrates missing
|
||||
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
|
||||
'info_dict': {
|
||||
'id': '3519514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
|
||||
},
|
||||
'skip': 'Site no longer embeds Playwire',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
|
||||
|
||||
player = self._download_json(
|
||||
f'http://config.playwire.com/{publisher_id}/videos/v2/{video_id}/zeus.json',
|
||||
video_id)
|
||||
|
||||
title = player['settings']['title']
|
||||
duration = float_or_none(player.get('duration'), 1000)
|
||||
|
||||
content = player['content']
|
||||
thumbnail = content.get('poster')
|
||||
src = content['media']['f4m']
|
||||
|
||||
formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
if not dict_get(a_format, ['tbr', 'width', 'height']):
|
||||
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,130 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PlVideoIE(InfoExtractor):
|
||||
IE_DESC = 'Платформа'
|
||||
_VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
|
||||
'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
|
||||
'info_dict': {
|
||||
'id': 'Y5JzUzkcQTMK',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
|
||||
'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
|
||||
'channel': 'RT en Español',
|
||||
'channel_id': 'ZH4EKqunVDvo',
|
||||
'media_type': 'video',
|
||||
'comment_count': int,
|
||||
'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
|
||||
'description': 'md5:a1a395d900d77a86542a91ee0826c115',
|
||||
'release_timestamp': 1715096124,
|
||||
'channel_is_verified': True,
|
||||
'like_count': int,
|
||||
'timestamp': 1715095911,
|
||||
'duration': 44320,
|
||||
'view_count': int,
|
||||
'dislike_count': int,
|
||||
'upload_date': '20240507',
|
||||
'modified_date': '20240701',
|
||||
'channel_follower_count': int,
|
||||
'modified_timestamp': 1719824073,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
|
||||
'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
|
||||
'info_dict': {
|
||||
'id': 'S3Uo9c-VLwFX',
|
||||
'ext': 'mp4',
|
||||
'channel': 'Romaatom',
|
||||
'tags': 'count:22',
|
||||
'dislike_count': int,
|
||||
'upload_date': '20241130',
|
||||
'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
|
||||
'duration': 58433,
|
||||
'modified_date': '20241130',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
|
||||
'media_type': 'shorts',
|
||||
'like_count': int,
|
||||
'modified_timestamp': 1732961458,
|
||||
'channel_is_verified': True,
|
||||
'channel_id': 'erJyyTIbmUd1',
|
||||
'timestamp': 1732961355,
|
||||
'comment_count': int,
|
||||
'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'release_timestamp': 1732961458,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
**traverse_obj(data, {
|
||||
'url': 'hls',
|
||||
'fps': ('fps', {float_or_none}),
|
||||
'aspect_ratio': ('aspectRatio', {float_or_none}),
|
||||
}),
|
||||
**parse_resolution(quality),
|
||||
})
|
||||
if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
|
||||
is_live = True
|
||||
formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
|
||||
for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
if lang.endswith('-auto'):
|
||||
automatic_captions.setdefault(lang[:-5], []).append({
|
||||
'url': url,
|
||||
})
|
||||
else:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
'is_live': is_live,
|
||||
**traverse_obj(video_data, ('item', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
|
||||
'duration': ('uploadFile', 'videoDuration', {int_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
|
||||
'channel_is_verified': ('channel', 'verified', {bool}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'release_timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'view_count': ('stats', 'viewTotalCount', {int_or_none}),
|
||||
'like_count': ('stats', 'likeCount', {int_or_none}),
|
||||
'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
|
||||
'comment_count': ('stats', 'commentCount', {int_or_none}),
|
||||
'media_type': ('type', {str}),
|
||||
})),
|
||||
}
|
||||
@ -1,496 +0,0 @@
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_ACCESS_ID = None
|
||||
_SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
|
||||
_V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
|
||||
|
||||
def _extract_video_info(self, url, clip_id):
|
||||
client_location = url
|
||||
|
||||
video = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos',
|
||||
clip_id, 'Downloading videos JSON', query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
'ids': clip_id,
|
||||
})[0]
|
||||
|
||||
if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
|
||||
self.report_drm(clip_id)
|
||||
|
||||
formats = []
|
||||
if self._ACCESS_ID:
|
||||
raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
|
||||
protocols = self._download_json(
|
||||
self._V4_BASE_URL + 'protocols', clip_id,
|
||||
'Downloading protocols JSON',
|
||||
headers=self.geo_verification_headers(), query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(),
|
||||
'video_id': clip_id,
|
||||
}, fatal=False, expected_status=(403,)) or {}
|
||||
error = protocols.get('error') or {}
|
||||
if error.get('title') == 'Geo check failed':
|
||||
self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
|
||||
server_token = protocols.get('server_token')
|
||||
if server_token:
|
||||
urls = (self._download_json(
|
||||
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
||||
'protocols': self._SUPPORTED_PROTOCOLS,
|
||||
'server_token': server_token,
|
||||
'video_id': clip_id,
|
||||
}, fatal=False) or {}).get('urls') or {}
|
||||
for protocol, variant in urls.items():
|
||||
source_url = variant.get('clear', {}).get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
if protocol == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, clip_id, mpd_id=protocol, fatal=False))
|
||||
elif protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=protocol, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': protocol,
|
||||
})
|
||||
if not formats:
|
||||
source_ids = [str(source['id']) for source in video['sources']]
|
||||
|
||||
client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode()).hexdigest()
|
||||
|
||||
sources = self._download_json(
|
||||
f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources',
|
||||
clip_id, 'Downloading sources JSON', query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
})
|
||||
server_id = sources['server_id']
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
bitrate = int_or_none(bitrate)
|
||||
if not bitrate:
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
for source_id in source_ids:
|
||||
client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode()).hexdigest()
|
||||
urls = self._download_json(
|
||||
f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources/url',
|
||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_id,
|
||||
})
|
||||
if not urls:
|
||||
continue
|
||||
if urls.get('status_code') != 0:
|
||||
raise ExtractorError('This video is unavailable', expected=True)
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
for source in urls_sources:
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
protocol = source.get('protocol')
|
||||
mimetype = source.get('mimetype')
|
||||
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, clip_id, f4m_id='hds', fatal=False))
|
||||
elif mimetype == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif mimetype == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, clip_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
tbr = fix_bitrate(source['bitrate'])
|
||||
if protocol in ('rtmp', 'rtmpe'):
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
|
||||
if not mobj:
|
||||
continue
|
||||
path = mobj.group('path')
|
||||
mp4colon_index = path.rfind('mp4:')
|
||||
app = path[:mp4colon_index]
|
||||
play_path = path[mp4colon_index:]
|
||||
formats.append({
|
||||
'url': '{}/{}'.format(mobj.group('url'), app),
|
||||
'app': app,
|
||||
'play_path': play_path,
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
'format_id': join_nonempty('rtmp', tbr),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'tbr': tbr,
|
||||
'format_id': join_nonempty('http', tbr),
|
||||
})
|
||||
|
||||
return {
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
IE_NAME = 'prosiebensat1'
|
||||
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?:beta\.)?
|
||||
(?:
|
||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
|
||||
)\.(?:de|at|ch)|
|
||||
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||
)
|
||||
/(?P<id>.+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
|
||||
# in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
|
||||
# - malformed f4m manifest support
|
||||
# - proper handling of URLs starting with `https?://` in 2.0 manifests
|
||||
# - recursive child f4m manifests extraction
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'mp4',
|
||||
'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
'duration': 5845.04,
|
||||
'series': 'CIRCUS HALLIGALLI',
|
||||
'season_number': 2,
|
||||
'episode': 'Episode 18 - Staffel 2',
|
||||
'episode_number': 18,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
|
||||
'info_dict': {
|
||||
'id': '2570327',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lady-Umstyling für Audrina',
|
||||
'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
|
||||
'upload_date': '20131014',
|
||||
'duration': 606.76,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Seems to be broken',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2429369',
|
||||
'ext': 'mp4',
|
||||
'title': 'Countdown für die Autowerkstatt',
|
||||
'description': 'md5:809fc051a457b5d8666013bc40698817',
|
||||
'upload_date': '20140223',
|
||||
'duration': 2595.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
|
||||
'info_dict': {
|
||||
'id': '2904997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sexy laufen in Ugg Boots',
|
||||
'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
|
||||
'upload_date': '20140122',
|
||||
'duration': 245.32,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
|
||||
'info_dict': {
|
||||
'id': '2906572',
|
||||
'ext': 'mp4',
|
||||
'title': 'Im Interview: Kai Wiesinger',
|
||||
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
|
||||
'upload_date': '20140203',
|
||||
'duration': 522.56,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2992323',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
|
||||
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
|
||||
'upload_date': '20141014',
|
||||
'duration': 2410.44,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '3004256',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schalke: Tönnies möchte Raul zurück',
|
||||
'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
|
||||
'upload_date': '20140226',
|
||||
'duration': 228.96,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
'info_dict': {
|
||||
'id': '2572814',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
|
||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||
'timestamp': 1382041620,
|
||||
'upload_date': '20131017',
|
||||
'duration': 469.88,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
|
||||
'info_dict': {
|
||||
'id': '2156342',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
|
||||
'duration': 307.24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
|
||||
'info_dict': {
|
||||
'id': '439664',
|
||||
'title': 'Episode 8 - Ganze Folge - Playlist',
|
||||
'description': 'md5:63b8963e71f481782aeea877658dec84',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
# title in <h2 class="subtitle">
|
||||
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
|
||||
'info_dict': {
|
||||
'id': '4895826',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
|
||||
'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
|
||||
'upload_date': '20170302',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geo restricted to Germany',
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_TOKEN = 'prosieben'
|
||||
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
|
||||
|
||||
_ACCESS_ID = 'x_prosiebenmaxx-de'
|
||||
_ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
|
||||
_IV = 'Aeluchoc6aevechuipiexeeboowedaok'
|
||||
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI][dD]\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
|
||||
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||
r'<h1 class="att-name">\s*(.+?)</h1>',
|
||||
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
|
||||
r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
|
||||
r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
|
||||
r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
|
||||
]
|
||||
_DESCRIPTION_REGEXES = [
|
||||
r'<p itemprop="description">\s*(.+?)</p>',
|
||||
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
|
||||
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
|
||||
r'<p class="att-description">\s*(.+?)\s*</p>',
|
||||
r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
|
||||
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
|
||||
]
|
||||
_UPLOAD_DATE_REGEXES = [
|
||||
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
|
||||
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
|
||||
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
||||
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
|
||||
]
|
||||
_PAGE_TYPE_REGEXES = [
|
||||
r'<meta name="page_type" content="([^"]+)">',
|
||||
r"'itemType'\s*:\s*'([^']*)'",
|
||||
]
|
||||
_PLAYLIST_ID_REGEXES = [
|
||||
r'content[iI]d=(\d+)',
|
||||
r"'itemId'\s*:\s*'([^']*)'",
|
||||
]
|
||||
_PLAYLIST_CLIP_REGEXES = [
|
||||
r'(?s)data-qvt=.+?<a href="([^"]+)"',
|
||||
]
|
||||
|
||||
def _extract_clip(self, url, webpage):
|
||||
clip_id = self._html_search_regex(
|
||||
self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
title = self._html_search_regex(
|
||||
self._TITLE_REGEXES, webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
info = self._extract_video_info(url, clip_id)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('og:published_time', webpage,
|
||||
'upload date', default=None)
|
||||
or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
json_ld = self._search_json_ld(webpage, clip_id, default={})
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}, json_ld)
|
||||
|
||||
def _extract_playlist(self, url, webpage):
|
||||
playlist_id = self._html_search_regex(
|
||||
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
|
||||
webpage, 'playlist'),
|
||||
playlist_id)
|
||||
entries = []
|
||||
for item in playlist:
|
||||
clip_id = item.get('id') or item.get('upc')
|
||||
if not clip_id:
|
||||
continue
|
||||
info = self._extract_video_info(url, clip_id)
|
||||
info.update({
|
||||
'id': clip_id,
|
||||
'title': item.get('title') or item.get('teaser', {}).get('headline'),
|
||||
'description': item.get('teaser', {}).get('description'),
|
||||
'thumbnail': item.get('poster'),
|
||||
'duration': float_or_none(item.get('duration')),
|
||||
'series': item.get('tvShowTitle'),
|
||||
'uploader': item.get('broadcastPublisher'),
|
||||
})
|
||||
entries.append(info)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_type = self._search_regex(
|
||||
self._PAGE_TYPE_REGEXES, webpage,
|
||||
'page type', default='clip').lower()
|
||||
if page_type == 'clip':
|
||||
return self._extract_clip(url, webpage)
|
||||
elif page_type == 'playlist':
|
||||
return self._extract_playlist(url, webpage)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
f'Unsupported page type {page_type}', expected=True)
|
||||
@ -1,50 +0,0 @@
|
||||
from .prosiebensat1 import ProSiebenSat1BaseIE
|
||||
from ..utils import parse_duration, unified_strdate
|
||||
|
||||
|
||||
class Puls4IE(ProSiebenSat1BaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
|
||||
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
|
||||
'info_dict': {
|
||||
'id': '118118',
|
||||
'ext': 'flv',
|
||||
'title': 'Tobias Homberger von myclubs im #2min2miotalk',
|
||||
'description': 'md5:f9def7c5e8745d6026d8885487d91955',
|
||||
'upload_date': '20160830',
|
||||
'uploader': 'PULS_4',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = 'puls4'
|
||||
_SALT = '01!kaNgaiNgah1Ie4AeSha'
|
||||
_CLIENT_NAME = ''
|
||||
|
||||
def _real_extract(self, url):
|
||||
path = self._match_id(url)
|
||||
content_path = self._download_json(
|
||||
'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
|
||||
media = self._download_json(
|
||||
'http://www.puls4.com' + content_path,
|
||||
content_path)['mediaCurrent']
|
||||
player_content = media['playerContent']
|
||||
info = self._extract_video_info(url, player_content['id'])
|
||||
info.update({
|
||||
'id': str(media['objectId']),
|
||||
'title': player_content['title'],
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('previewLink'),
|
||||
'upload_date': unified_strdate(media.get('date')),
|
||||
'duration': parse_duration(player_content.get('duration')),
|
||||
'episode': player_content.get('episodePartName'),
|
||||
'show': media.get('channel'),
|
||||
'season_id': player_content.get('seasonId'),
|
||||
'uploader': player_content.get('sourceCompany'),
|
||||
})
|
||||
return info
|
||||
@ -1,154 +0,0 @@
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_call,
|
||||
unified_strdate,
|
||||
update_url,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RadioComercialIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
|
||||
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
|
||||
'info_dict': {
|
||||
'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas',
|
||||
'ext': 'mp3',
|
||||
'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.',
|
||||
'release_date': '20231025',
|
||||
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
|
||||
'season': 'Season 6',
|
||||
'season_number': 6,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem',
|
||||
'md5': '47e96c273aef96a8eb160cd6cf46d782',
|
||||
'info_dict': {
|
||||
'id': 'convenca-me-num-minuto-que-os-lobisomens-existem',
|
||||
'ext': 'mp3',
|
||||
'title': 'Convença-me num minuto que os lobisomens existem',
|
||||
'release_date': '20231026',
|
||||
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao',
|
||||
'md5': '69be64255420fec23b7259955d771e54',
|
||||
'info_dict': {
|
||||
'id': 'o-desastre-de-aviao',
|
||||
'ext': 'mp3',
|
||||
'title': 'O desastre de avião',
|
||||
'description': 'md5:8a82beeb372641614772baab7246245f',
|
||||
'release_date': '20231101',
|
||||
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
},
|
||||
'params': {
|
||||
# inconsistant md5
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro',
|
||||
'md5': '91d32d4d4b1407272068b102730fc9fa',
|
||||
'info_dict': {
|
||||
'id': 't-n-t-29-de-outubro',
|
||||
'ext': 'mp3',
|
||||
'title': 'T.N.T 29 de outubro',
|
||||
'release_date': '20231029',
|
||||
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
|
||||
'season': 'Season 2023',
|
||||
'season_number': 2023,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, season = self._match_valid_url(url).group('id', 'season')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'release_date': unified_strdate(get_element_by_class(
|
||||
'date', get_element_html_by_class('descriptions', webpage) or '')),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'season_number': int_or_none(season),
|
||||
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
|
||||
}
|
||||
|
||||
|
||||
class RadioComercialPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
|
||||
'info_dict': {
|
||||
'id': 'convenca-me-num-minuto_t3',
|
||||
'title': 'Convença-me num Minuto - Temporada 3',
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao',
|
||||
'info_dict': {
|
||||
'id': 'o-homem-que-mordeu-o-cao',
|
||||
'title': 'O Homem Que Mordeu o Cão',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas',
|
||||
'info_dict': {
|
||||
'id': 'as-minhas-coisas-favoritas',
|
||||
'title': 'As Minhas Coisas Favoritas',
|
||||
},
|
||||
'playlist_mincount': 131,
|
||||
}, {
|
||||
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023',
|
||||
'info_dict': {
|
||||
'id': 'tnt-todos-no-top_t2023',
|
||||
'title': 'TNT - Todos No Top - Temporada 2023',
|
||||
},
|
||||
'playlist_mincount': 39,
|
||||
}]
|
||||
|
||||
def _entries(self, url, playlist_id):
|
||||
for page in itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/{page}', playlist_id, f'Downloading page {page}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
break
|
||||
raise
|
||||
|
||||
episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
|
||||
if not episodes:
|
||||
break
|
||||
for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
|
||||
episode_url = urljoin(url, url_path)
|
||||
if RadioComercialIE.suitable(episode_url):
|
||||
yield episode_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
podcast, season = self._match_valid_url(url).group('id', 'season')
|
||||
playlist_id = join_nonempty(podcast, season, delim='_t')
|
||||
url = update_url(url, query=None, fragment=None)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
|
||||
|
||||
return self.playlist_from_matches(
|
||||
self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)
|
||||
@ -1,134 +0,0 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RedCDNLivxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
|
||||
IE_NAME = 'redcdnlivx'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
|
||||
'info_dict': {
|
||||
'id': 'ENC02-638272860000-638292544000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC02',
|
||||
'duration': 19683.982,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
|
||||
'info_dict': {
|
||||
'id': 'ENC18-722333096000-722335562000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC18',
|
||||
'duration': 2463.995,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
|
||||
'info_dict': {
|
||||
'id': 'triathlon2018-warsaw-550305000000-550327620000',
|
||||
'ext': 'mp4',
|
||||
'title': 'triathlon2018/warsaw',
|
||||
'duration': 22619.98,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
'''
|
||||
Known methods (first in url path):
|
||||
- `livedash` - DASH MPD
|
||||
- `livehls` - HTTP Live Streaming
|
||||
- `livess` - IIS Smooth Streaming
|
||||
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
|
||||
- `sc` - shoutcast/icecast (audio streams, like radio)
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
tenant, path = self._match_valid_url(url).group('tenant', 'id')
|
||||
qs = parse_qs(url)
|
||||
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
|
||||
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
|
||||
|
||||
def livx_mode(mode):
|
||||
suffix = ''
|
||||
if mode == 'livess':
|
||||
suffix = '/manifest'
|
||||
elif mode == 'livehls':
|
||||
suffix = '/playlist.m3u8'
|
||||
file_qs = {}
|
||||
if start_time:
|
||||
file_qs['startTime'] = start_time
|
||||
if stop_time:
|
||||
file_qs['stopTime'] = stop_time
|
||||
if mode == 'nvr':
|
||||
file_qs['nolimit'] = 1
|
||||
elif mode != 'sc':
|
||||
file_qs['indexMode'] = 'true'
|
||||
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
|
||||
|
||||
# no id or title for a transmission. making ones up.
|
||||
title = path \
|
||||
.replace('/live', '').replace('live/', '') \
|
||||
.replace('/channel', '').replace('channel/', '') \
|
||||
.strip('/')
|
||||
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
|
||||
|
||||
formats = []
|
||||
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
|
||||
ism_res = self._download_xml_handle(
|
||||
livx_mode('livess'), video_id,
|
||||
note='Downloading ISM manifest',
|
||||
errnote='Failed to download ISM manifest',
|
||||
fatal=False)
|
||||
ism_doc = None
|
||||
if ism_res is not False:
|
||||
ism_doc, ism_urlh = ism_res
|
||||
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
|
||||
|
||||
nvr_urlh = self._request_webpage(
|
||||
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
|
||||
expected_status=lambda _: True)
|
||||
if nvr_urlh and nvr_urlh.status == 200:
|
||||
formats.append({
|
||||
'url': nvr_urlh.url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'direct-0',
|
||||
'preference': -1, # might be slow
|
||||
})
|
||||
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {float_or_none(scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
live_status = 'is_live'
|
||||
elif duration:
|
||||
live_status = 'was_live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
}
|
||||
@ -1,94 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, merge_dicts, remove_end
|
||||
|
||||
|
||||
class RheinMainTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
|
||||
'ext': 'ismv', # ismv+isma will be merged into mp4
|
||||
'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
|
||||
'title': 'Auf dem Weg zur Deutschen Meisterschaft',
|
||||
'upload_date': '20221108',
|
||||
'view_count': int,
|
||||
'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
|
||||
'timestamp': 1667933057,
|
||||
'duration': 243.0,
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
|
||||
'ext': 'ismv',
|
||||
'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
|
||||
'timestamp': 1668526214,
|
||||
'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
|
||||
'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'duration': 345.0,
|
||||
'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
|
||||
'upload_date': '20221115',
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
|
||||
'ext': 'ismv',
|
||||
'title': 'Casino Mainz bei den Deutschen Meisterschaften',
|
||||
'view_count': int,
|
||||
'timestamp': 1668527402,
|
||||
'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
|
||||
'upload_date': '20221115',
|
||||
'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
|
||||
'duration': 348.0,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('video_id').replace('/', '-')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
|
||||
webpage, 'video', group=('source', 'img'))
|
||||
source = extract_attributes(source)
|
||||
img = extract_attributes(img)
|
||||
|
||||
raw_json_ld = list(self._yield_json_ld(webpage, video_id))
|
||||
json_ld = self._json_ld(raw_json_ld, video_id)
|
||||
json_ld.pop('url', None)
|
||||
|
||||
ism_manifest_url = (
|
||||
source.get('src')
|
||||
or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
|
||||
)
|
||||
formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title':
|
||||
self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
|
||||
webpage, 'headline', default=None)
|
||||
or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
|
||||
or remove_end(self._html_extract_title(webpage), ' -'),
|
||||
'alt_title': img.get('alt'),
|
||||
'description': json_ld.get('description') or self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
|
||||
}, json_ld)
|
||||
@ -1,69 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class RMCDecouverteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/',
|
||||
'info_dict': {
|
||||
'id': '6250879771001',
|
||||
'ext': 'mp4',
|
||||
'title': 'LES BUNKERS SECRETS D´OMAHA BEACH',
|
||||
'uploader_id': '1969646226001',
|
||||
'description': 'md5:aed573ca24abde62a148e0eba909657d',
|
||||
'timestamp': 1619622984,
|
||||
'upload_date': '20210428',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
|
||||
'info_dict': {
|
||||
'id': '5983675500001',
|
||||
'ext': 'mp4',
|
||||
'title': 'CORVETTE',
|
||||
'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
|
||||
'uploader_id': '1969646226001',
|
||||
'upload_date': '20181226',
|
||||
'timestamp': 1545861635,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'only available for a week',
|
||||
}, {
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# The website accepts any URL as long as it has _\d+ at the end
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# live, geo restricted, bypassable
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
display_id = mobj.group('id') or 'direct'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
if brightcove_legacy_url:
|
||||
brightcove_id = urllib.parse.parse_qs(urllib.parse.urlparse(
|
||||
brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
else:
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['FR']}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
@ -1,7 +1,6 @@
|
||||
import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .redge import RedCDNLivxIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
@ -27,6 +26,7 @@ def rfc3339_to_atende(date):
|
||||
|
||||
|
||||
class SejmIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
|
||||
@ -185,7 +185,7 @@ class SejmIE(InfoExtractor):
|
||||
entries.append({
|
||||
**common_info,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RedCDNLivxIE.ie_key(),
|
||||
'ie_key': 'redcdnlivx',
|
||||
'id': stream_id,
|
||||
'title': join_nonempty(title, stream_id, delim=' - '),
|
||||
})
|
||||
|
||||
@ -1,105 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
|
||||
'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
# test the first video only to prevent lengthy tests
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '240385',
|
||||
'ext': 'mp4',
|
||||
'title': 'Indians introduce Encarnacion',
|
||||
'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
|
||||
'duration': 137.898,
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'upload_date': '20170105',
|
||||
'timestamp': 1483649762,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
|
||||
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
|
||||
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
|
||||
\1>''', webpage)
|
||||
if mobj:
|
||||
sc = mobj.group('SC')
|
||||
yield cls._URL_TEMPLATE % sc
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
data_url = update_url_query(
|
||||
url.replace('embedplayer.php', 'data_read.php'),
|
||||
{'cmd': 'loadInitial'})
|
||||
playlist_data = self._download_json(data_url, playlist_id)
|
||||
|
||||
entries = []
|
||||
for video in playlist_data['playlistData'][0]:
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
video['jwconfiguration'],
|
||||
require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
|
||||
|
||||
for f in info_dict['formats']:
|
||||
if f.get('tbr'):
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'/(\d+)k/', f['url'], 'bitrate', default=None))
|
||||
if not tbr:
|
||||
continue
|
||||
f.update({
|
||||
'format_id': f'{determine_protocol(f)}-{tbr}',
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
if video.get('thumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'normal',
|
||||
'url': video['thumbnailUrl'],
|
||||
})
|
||||
if video.get('smThumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'small',
|
||||
'url': video['smThumbnailUrl'],
|
||||
})
|
||||
info_dict.update({
|
||||
'title': video['S_headLine'].strip(),
|
||||
'description': unescapeHTML(video.get('S_fullStory')),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(video.get('SM_length')),
|
||||
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
|
||||
# 'tbr' was explicitly set to be preferred over 'height' originally,
|
||||
# So this is being kept unless someone can confirm this is unnecessary
|
||||
'_format_sort_fields': ('tbr', 'res'),
|
||||
})
|
||||
entries.append(info_dict)
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
@ -1,6 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ShareVideosEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1']
|
||||
@ -1,68 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class SnotrIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
|
||||
'info_dict': {
|
||||
'id': '13708',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drone flying through fireworks!',
|
||||
'duration': 248,
|
||||
'filesize_approx': 40700000,
|
||||
'description': 'A drone flying through Fourth of July Fireworks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'expected_warnings': ['description'],
|
||||
}, {
|
||||
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
|
||||
'info_dict': {
|
||||
'id': '530',
|
||||
'ext': 'mp4',
|
||||
'title': 'David Letteman - George W. Bush Top 10',
|
||||
'duration': 126,
|
||||
'filesize_approx': 8500000,
|
||||
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
info_dict = self._parse_html5_media_entries(
|
||||
url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
|
||||
webpage, 'filesize', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'description': description,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'filesize_approx': filesize_approx,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
@ -1,122 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
xpath_attr,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SpringboardPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
cms\.springboardplatform\.com/
|
||||
(?:
|
||||
(?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
|
||||
xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
|
||||
)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
|
||||
'md5': '5c3cb7b5c55740d482561099e920f192',
|
||||
'info_dict': {
|
||||
'id': '981017',
|
||||
'ext': 'mp4',
|
||||
'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
|
||||
'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1409132328,
|
||||
'upload_date': '20140827',
|
||||
'duration': 193,
|
||||
},
|
||||
'skip': 'Invalid URL',
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
|
||||
'info_dict': {
|
||||
'id': '1731611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
|
||||
},
|
||||
'skip': 'Invalid URL',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
index = mobj.group('index') or mobj.group('index_2')
|
||||
|
||||
video = self._download_xml(
|
||||
f'http://cms.springboardplatform.com/xml_feeds_advanced/index/{index}/rss3/{video_id}', video_id)
|
||||
|
||||
item = xpath_element(video, './/item', 'item', fatal=True)
|
||||
|
||||
content = xpath_element(
|
||||
item, './{http://search.yahoo.com/mrss/}content', 'content',
|
||||
fatal=True)
|
||||
title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
|
||||
|
||||
video_url = content.attrib['url']
|
||||
|
||||
if 'error_video.mp4' in video_url:
|
||||
raise ExtractorError(
|
||||
f'Video {video_id} no longer exists', expected=True)
|
||||
|
||||
duration = int_or_none(content.get('duration'))
|
||||
tbr = int_or_none(content.get('bitrate'))
|
||||
filesize = int_or_none(content.get('fileSize'))
|
||||
width = int_or_none(content.get('width'))
|
||||
height = int_or_none(content.get('height'))
|
||||
|
||||
description = unescapeHTML(xpath_text(
|
||||
item, './description', 'description'))
|
||||
thumbnail = xpath_attr(
|
||||
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
|
||||
'thumbnail')
|
||||
|
||||
timestamp = unified_timestamp(xpath_text(
|
||||
item, './{http://cms.springboardplatform.com/namespaces.html}created',
|
||||
'timestamp'))
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'tbr': tbr,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
m3u8_format = formats[0].copy()
|
||||
m3u8_format.update({
|
||||
'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
formats.append(m3u8_format)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
@ -1,89 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class StanfordOpenClassroomIE(InfoExtractor):
|
||||
IE_NAME = 'stanfordoc'
|
||||
IE_DESC = 'Stanford Open ClassRoom'
|
||||
_VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_TEST = {
|
||||
'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
||||
'md5': '544a9468546059d4e80d76265b0443b8',
|
||||
'info_dict': {
|
||||
'id': 'PracticalUnix_intro-environment',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intro Environment',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
|
||||
if mobj.group('course') and mobj.group('video'): # A specific video
|
||||
course = mobj.group('course')
|
||||
video = mobj.group('video')
|
||||
info = {
|
||||
'id': course + '_' + video,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
base_url = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
||||
xml_url = base_url + video + '.xml'
|
||||
mdoc = self._download_xml(xml_url, info['id'])
|
||||
try:
|
||||
info['title'] = mdoc.findall('./title')[0].text
|
||||
info['url'] = base_url + mdoc.findall('./videoFile')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError('Invalid metadata XML file')
|
||||
return info
|
||||
elif mobj.group('course'): # A course page
|
||||
course = mobj.group('course')
|
||||
info = {
|
||||
'id': course,
|
||||
'_type': 'playlist',
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
coursepage = self._download_webpage(
|
||||
url, info['id'],
|
||||
note='Downloading course info page',
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
info['title'] = self._html_search_regex(
|
||||
r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
|
||||
|
||||
info['description'] = self._html_search_regex(
|
||||
r'(?s)<description>([^<]+)</description>',
|
||||
coursepage, 'description', fatal=False)
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
|
||||
info['entries'] = [self.url_result(
|
||||
f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',
|
||||
) for l in links]
|
||||
return info
|
||||
else: # Root page
|
||||
info = {
|
||||
'id': 'Stanford OpenClassroom',
|
||||
'_type': 'playlist',
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
info['title'] = info['id']
|
||||
|
||||
root_url = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||
rootpage = self._download_webpage(root_url, info['id'],
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
|
||||
info['entries'] = [self.url_result(
|
||||
f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',
|
||||
) for l in links]
|
||||
return info
|
||||
@ -1,141 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StitcherBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/' + path,
|
||||
video_id, query=query)
|
||||
error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
|
||||
if error_massage:
|
||||
raise ExtractorError(error_massage, expected=True)
|
||||
return resp['data']
|
||||
|
||||
def _extract_description(self, data):
|
||||
return clean_html(data.get('html_description') or data.get('description'))
|
||||
|
||||
def _extract_audio_url(self, episode):
|
||||
return url_or_none(episode.get('audio_url') or episode.get('guid'))
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'thumbnail': show.get('image_base_url'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _extract_episode(self, episode, audio_url, show_info):
|
||||
info = {
|
||||
'id': str(episode['id']),
|
||||
'display_id': episode.get('slug'),
|
||||
'title': episode['title'].strip(),
|
||||
'description': self._extract_description(episode),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'url': clean_podcast_url(audio_url),
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_published')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
|
||||
class StitcherIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
|
||||
'md5': 'e9635098e0da10b21a0e2b85585530f6',
|
||||
'info_dict': {
|
||||
'id': '40789481',
|
||||
'ext': 'mp3',
|
||||
'title': 'Machine Learning Mastery and Cancer Clusters',
|
||||
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
|
||||
'duration': 1604,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20151008',
|
||||
'timestamp': 1444285800,
|
||||
'series': 'Talking Machines',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
|
||||
'info_dict': {
|
||||
'id': '40846275',
|
||||
'display_id': 'the-rare-hourlong-comedy-plus',
|
||||
'ext': 'mp3',
|
||||
'title': "The CW's 'Crazy Ex-Girlfriend'",
|
||||
'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
|
||||
'duration': 2235,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
}, {
|
||||
# escaped title
|
||||
'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'shows/episodes', audio_id, {'episode_ids': audio_id})
|
||||
episode = data['episodes'][0]
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
self.raise_login_required()
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
return self._extract_episode(
|
||||
episode, audio_url, self._extract_show_info(show))
|
||||
|
||||
|
||||
class StitcherShowIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines',
|
||||
'info_dict': {
|
||||
'id': 'the-talking-machines',
|
||||
'title': 'Talking Machines',
|
||||
'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
|
||||
},
|
||||
'playlist_mincount': 106,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/the-talking-machines',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
data = self._call_api(
|
||||
f'search/show/{show_slug}/allEpisodes', show_slug, {'count': 10000})
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
show_info = self._extract_show_info(show)
|
||||
|
||||
entries = []
|
||||
for episode in (data.get('episodes') or []):
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, audio_url, show_info))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_slug, show.get('title'),
|
||||
self._extract_description(show))
|
||||
@ -1,35 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class StretchInternetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
|
||||
'info_dict': {
|
||||
'id': '573272',
|
||||
'ext': 'mp4',
|
||||
'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
|
||||
# 'timestamp': 1575668361,
|
||||
# 'upload_date': '20191206',
|
||||
'uploader_id': '99997',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
media_url = self._download_json(
|
||||
'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
|
||||
video_id)[0]['media'][0]['url']
|
||||
event = self._download_json(
|
||||
'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
|
||||
video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': event['title'],
|
||||
# TODO: parse US timezone abbreviations
|
||||
# 'timestamp': event.get('dateTimeString'),
|
||||
'url': 'https://' + media_url,
|
||||
'uploader_id': event.get('ownerID'),
|
||||
}
|
||||
@ -1,45 +0,0 @@
|
||||
from .vidyard import VidyardBaseIE
|
||||
from ..utils import ExtractorError, int_or_none, make_archive_id
|
||||
|
||||
|
||||
class SwearnetEpisodeIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
|
||||
'info_dict': {
|
||||
'id': 'wicK2EOzjOdxkUXGDIgcPw',
|
||||
'display_id': '232819',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'duration': 719,
|
||||
'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+',
|
||||
'season': 'Season 1',
|
||||
'title': 'Episode 1 - Grilled Cheese Sammich',
|
||||
'season_number': 1,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg',
|
||||
'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'],
|
||||
'_old_archive_ids': ['swearnetepisode 232819'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, slug)
|
||||
|
||||
try:
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
except ExtractorError:
|
||||
if 'Upgrade Now' in webpage:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
|
||||
return {
|
||||
**info,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class SYVDKIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
|
||||
'md5': '429ce5a423dd4b1e1d0bf3a569558089',
|
||||
'info_dict': {
|
||||
'id': '12215',
|
||||
'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
|
||||
'ext': 'mp3',
|
||||
'title': 'Isabella Arendt stiller op for De Konservative',
|
||||
'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
|
||||
|
||||
return {
|
||||
'id': str(info_data['id']),
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
'url': info_data['details']['enclosure'],
|
||||
'display_id': video_id,
|
||||
'title': traverse_obj(info_data, ('title', 'rendered')),
|
||||
'description': traverse_obj(info_data, ('details', 'post_title')),
|
||||
}
|
||||
@ -1,114 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
|
||||
|
||||
class IVXPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'ivxplayer:(?P<video_id>\d+):(?P<player_key>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'ivxplayer:2366065:4a89dfe6bc8f002596b1dfbd600730b1',
|
||||
'info_dict': {
|
||||
'id': '2366065',
|
||||
'ext': 'mp4',
|
||||
'duration': 112,
|
||||
'upload_date': '20221204',
|
||||
'title': 'Film Indonesia di Disney Content Showcase Asia Pacific 2022',
|
||||
'timestamp': 1670151746,
|
||||
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.cantika.com/video/31737/film-indonesia-di-disney-content-showcase-asia-pacific-2022',
|
||||
'info_dict': {
|
||||
'id': '2374200',
|
||||
'ext': 'mp4',
|
||||
'duration': 110,
|
||||
'title': 'Serial Indonesia di Disney Content Showcase Asia Pacific 2022',
|
||||
'timestamp': 1670639416,
|
||||
'upload_date': '20221210',
|
||||
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gooto.com/video/11437/wuling-suv-ramai-dikunjungi-di-giias-2018',
|
||||
'info_dict': {
|
||||
'id': '892109',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wuling SUV Ramai Dikunjungi di GIIAS 2018',
|
||||
'upload_date': '20180811',
|
||||
'description': 'md5:6d901483d0aacc664aecb4489719aafa',
|
||||
'duration': 75,
|
||||
'timestamp': 1534011263,
|
||||
'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# more info at https://player.ivideosmart.com/ivsplayer/v4/dist/js/loader.js
|
||||
mobj = re.search(
|
||||
r'<ivs-player\s*[^>]+data-ivs-key\s*=\s*"(?P<player_key>[\w]+)\s*[^>]+\bdata-ivs-vid="(?P<video_id>[\w-]+)',
|
||||
webpage)
|
||||
if mobj:
|
||||
yield f'ivxplayer:{mobj.group("video_id")}:{mobj.group("player_key")}'
|
||||
raise cls.StopExtraction
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, player_key = self._match_valid_url(url).group('video_id', 'player_key')
|
||||
json_data = self._download_json(
|
||||
f'https://ivxplayer.ivideosmart.com/prod/video/{video_id}?key={player_key}', video_id)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
json_data['player']['video_url'], video_id)
|
||||
|
||||
return {
|
||||
'id': str(json_data['ivx']['id']),
|
||||
'title': traverse_obj(json_data, ('ivx', 'name')),
|
||||
'description': traverse_obj(json_data, ('ivx', 'description')),
|
||||
'duration': int_or_none(traverse_obj(json_data, ('ivx', 'duration'))),
|
||||
'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'published_at'))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url')),
|
||||
}
|
||||
|
||||
|
||||
class TempoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.tempo\.co/\w+/\d+/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.tempo.co/read/30058/anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
|
||||
'info_dict': {
|
||||
'id': '2144275',
|
||||
'display_id': 'anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anies Baswedan Ajukan Banding Putusan PTUN Batalkan UMP DKI',
|
||||
'duration': 85,
|
||||
'description': 'md5:a6822b7c4c874fa7e5bd63e96a387b66',
|
||||
'thumbnail': 'https://statik.tempo.co/data/2022/07/27/id_1128287/1128287_720.jpg',
|
||||
'timestamp': 1658907970,
|
||||
'upload_date': '20220727',
|
||||
'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
_, video_id, player_key = next(IVXPlayerIE._extract_embed_urls(url, webpage)).split(':')
|
||||
|
||||
json_ld_data = self._search_json_ld(webpage, display_id)
|
||||
|
||||
return self.url_result(
|
||||
f'ivxplayer:{video_id}:{player_key}', display_id=display_id,
|
||||
thumbnail=self._html_search_meta('twitter:image:src', webpage) or self._og_search_thumbnail(webpage),
|
||||
tags=try_call(lambda: self._html_search_meta('keywords', webpage).split(',')),
|
||||
description=(json_ld_data.get('description')
|
||||
or self._html_search_meta(('description', 'twitter:description'), webpage)
|
||||
or self._og_search_description(webpage)),
|
||||
url_transparent=True)
|
||||
@ -1,35 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, remove_end
|
||||
|
||||
|
||||
class TheHoleTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov',
|
||||
'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9',
|
||||
'info_dict': {
|
||||
'id': 'gromkii-vopros-sergey-orlov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Сергей Орлов — Громкий вопрос',
|
||||
'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts',
|
||||
'description': 'md5:45741a9202331f995d9fb76996759379',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_attrs = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player'))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
player_attrs['data-player-source-value'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': remove_end(self._html_extract_title(webpage), ' — The Hole'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': player_attrs.get('data-player-poster-value'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@ -1,61 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TrailerAddictIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||
'info_dict': {
|
||||
'id': '76184',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prince Avalanche Trailer',
|
||||
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
name = mobj.group('movie') + '/' + mobj.group('trailer_name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
|
||||
view_count_str = self._search_regex(
|
||||
r'<span class="views_n">([0-9,.]+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
view_count = (
|
||||
None if view_count_str is None
|
||||
else int(view_count_str.replace(',', '')))
|
||||
video_id = self._search_regex(
|
||||
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||
webpage, 'video id')
|
||||
|
||||
# Presence of (no)watchplus function indicates HD quality is available
|
||||
if re.search(r'function (no)?watchplus()', webpage):
|
||||
fvar = 'fvarhd'
|
||||
else:
|
||||
fvar = 'fvar'
|
||||
|
||||
info_url = f'http://www.traileraddict.com/{fvar}.php?tid={video_id!s}'
|
||||
info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')
|
||||
|
||||
final_url = self._search_regex(r'&fileurl=(.+)',
|
||||
info_webpage, 'Download url').replace('%3F', '?')
|
||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||
info_webpage, 'thumbnail url')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
}
|
||||
@ -1,329 +0,0 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class TrillerBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'triller'
|
||||
_API_BASE_URL = 'https://social.triller.co/v1.5'
|
||||
_API_HEADERS = {'Origin': 'https://triller.co'}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._API_HEADERS.get('Authorization'):
|
||||
return
|
||||
|
||||
headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
|
||||
user_check = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
|
||||
fatal=False, expected_status=400, headers=headers,
|
||||
data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
|
||||
|
||||
if user_check: # endpoint returns `"status":false` if username exists
|
||||
raise ExtractorError('Unable to login: Invalid username', expected=True)
|
||||
|
||||
login = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
|
||||
expected_status=400, headers=headers, data=json.dumps({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}, separators=(',', ':')).encode()) or {}
|
||||
|
||||
if not login.get('auth_token'):
|
||||
if login.get('error') == 1008:
|
||||
raise ExtractorError('Unable to login: Incorrect password', expected=True)
|
||||
raise ExtractorError('Unable to login')
|
||||
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'
|
||||
|
||||
def _get_comments(self, video_id, limit=15):
|
||||
comment_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
|
||||
video_id, fatal=False, note='Downloading comments API JSON',
|
||||
headers=self._API_HEADERS, query={'limit': limit}) or {}
|
||||
if not comment_info.get('comments'):
|
||||
return
|
||||
yield from traverse_obj(comment_info, ('comments', ..., {
|
||||
'id': ('id', {str_or_none}),
|
||||
'text': 'body',
|
||||
'author': ('author', 'username'),
|
||||
'author_id': ('author', 'user_id'),
|
||||
'timestamp': ('timestamp', {unified_timestamp}),
|
||||
}))
|
||||
|
||||
def _parse_video_info(self, video_info, username, user_id, display_id=None):
|
||||
video_id = str(video_info['id'])
|
||||
display_id = display_id or video_info.get('video_uuid')
|
||||
|
||||
if traverse_obj(video_info, (
|
||||
None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
|
||||
{lambda x: re.search(r'/copyright/', x)}), get_all=False):
|
||||
self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
|
||||
|
||||
def format_info(url):
|
||||
return {
|
||||
'url': url,
|
||||
'ext': determine_ext(url),
|
||||
'format_id': url_basename(url).split('.')[0],
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
|
||||
for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
**format_info(video['url']),
|
||||
**parse_resolution(video.get('resolution')),
|
||||
'vcodec': video.get('codec'),
|
||||
'vbr': int_or_none(video.get('bitrate'), 1000),
|
||||
})
|
||||
|
||||
video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
|
||||
if video_url:
|
||||
formats.append({
|
||||
**format_info(video_url),
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(video_info, {
|
||||
'width': 'width',
|
||||
'height': 'height',
|
||||
'filesize': 'filesize',
|
||||
}, expected_type=int_or_none),
|
||||
})
|
||||
|
||||
audio_url = url_or_none(video_info.get('audio_url'))
|
||||
if audio_url:
|
||||
formats.append(format_info(audio_url))
|
||||
|
||||
comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'uploader': username,
|
||||
'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
|
||||
'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
|
||||
'uploader_url': f'https://triller.co/@{username}',
|
||||
'extractor_key': TrillerIE.ie_key(),
|
||||
'extractor': TrillerIE.IE_NAME,
|
||||
'formats': formats,
|
||||
'comment_count': comment_count,
|
||||
'__post_extractor': self.extract_comments(video_id, comment_count),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
|
||||
'description': 'description',
|
||||
'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'timestamp': ('timestamp', {unified_timestamp}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'like_count': ('likes_count', {int_or_none}),
|
||||
'artist': 'song_artist',
|
||||
'track': 'song_title',
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class TrillerIE(TrillerBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?triller\.co/
|
||||
@(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||
'md5': '228662d783923b60d78395fedddc0a20',
|
||||
'info_dict': {
|
||||
'id': '71595734',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9a2bf9435c5c4292678996a464669416',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:9a2bf9435c5c4292678996a464669416',
|
||||
'uploader': 'theestallion',
|
||||
'uploader_id': '18992236',
|
||||
'creator': 'Megan Thee Stallion',
|
||||
'timestamp': 1660598222,
|
||||
'upload_date': '20220815',
|
||||
'duration': 47,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Megan Thee Stallion',
|
||||
'track': 'Her',
|
||||
'uploader_url': 'https://triller.co/@theestallion',
|
||||
'comment_count': int,
|
||||
},
|
||||
'skip': 'This video has been removed due to licensing restrictions',
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'md5': '874055f462af5b0699b9dbb527a505a0',
|
||||
'info_dict': {
|
||||
'id': '71621339',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'uploader': 'charlidamelio',
|
||||
'uploader_id': '1875551',
|
||||
'creator': 'charli damelio',
|
||||
'timestamp': 1660773354,
|
||||
'upload_date': '20220817',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Dixie',
|
||||
'track': 'Someone to Blame',
|
||||
'uploader_url': 'https://triller.co/@charlidamelio',
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||
'md5': 'af7b3553e4b8bfca507636471ee2eb41',
|
||||
'info_dict': {
|
||||
'id': '71837829',
|
||||
'ext': 'mp4',
|
||||
'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop',
|
||||
'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
|
||||
'uploader': 'theestallion',
|
||||
'uploader_id': '18992236',
|
||||
'creator': 'Megan Thee Stallion',
|
||||
'timestamp': 1662486178,
|
||||
'upload_date': '20220906',
|
||||
'duration': 30,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Unknown',
|
||||
'track': 'Unknown',
|
||||
'uploader_url': 'https://triller.co/@theestallion',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, display_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
video_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
|
||||
headers=self._API_HEADERS)['videos'][0]
|
||||
|
||||
return self._parse_video_info(video_info, username, None, display_id)
|
||||
|
||||
|
||||
class TrillerUserIE(TrillerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://triller.co/@theestallion',
|
||||
'playlist_mincount': 12,
|
||||
'info_dict': {
|
||||
'id': '18992236',
|
||||
'title': 'theestallion',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio',
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': '1875551',
|
||||
'title': 'charlidamelio',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._API_HEADERS.get('Authorization'):
|
||||
guest = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/create_guest', None,
|
||||
note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
|
||||
'platform': 'Web',
|
||||
'app_version': '',
|
||||
})
|
||||
if not guest.get('auth_token'):
|
||||
raise ExtractorError('Unable to fetch required auth token for user extraction')
|
||||
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
|
||||
|
||||
def _entries(self, username, user_id, limit=6):
|
||||
query = {'limit': limit}
|
||||
for page in itertools.count(1):
|
||||
videos = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
||||
username, note=f'Downloading user video list page {page}',
|
||||
headers=self._API_HEADERS, query=query)
|
||||
|
||||
for video in traverse_obj(videos, ('videos', ...)):
|
||||
yield self._parse_video_info(video, username, user_id)
|
||||
|
||||
query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
|
||||
if not query['before_time']:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
|
||||
user_info = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/by_username/{username}',
|
||||
username, note='Downloading user info', headers=self._API_HEADERS), ('user', {dict})) or {}
|
||||
|
||||
if user_info.get('private') and user_info.get('followed_by_me') not in (True, 'true'):
|
||||
raise ExtractorError('This user profile is private', expected=True)
|
||||
elif traverse_obj(user_info, (('blocked_by_user', 'blocking_user'), {bool}), get_all=False):
|
||||
raise ExtractorError('The author of the video is blocked', expected=True)
|
||||
|
||||
user_id = str_or_none(user_info.get('user_id'))
|
||||
if not user_id:
|
||||
raise ExtractorError('Unable to extract user ID')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
|
||||
|
||||
|
||||
class TrillerShortIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://v.triller.co/WWZNWk',
|
||||
'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
|
||||
'info_dict': {
|
||||
'id': '66210052',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||
'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||
'uploader': 'statefairent',
|
||||
'uploader_id': '487545193',
|
||||
'creator': 'Official Summer Fair of LA',
|
||||
'timestamp': 1629655457,
|
||||
'upload_date': '20210822',
|
||||
'duration': 19,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Unknown',
|
||||
'track': 'Unknown',
|
||||
'uploader_url': 'https://triller.co/@statefairent',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).url
|
||||
if self.suitable(real_url): # Prevent infinite loop in case redirect fails
|
||||
raise UnsupportedError(real_url)
|
||||
return self.url_result(real_url)
|
||||
@ -1,79 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tvplayer.com/watch/bbcone',
|
||||
'info_dict': {
|
||||
'id': '89',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
current_channel = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
|
||||
webpage, 'channel element'))
|
||||
title = current_channel['data-name']
|
||||
|
||||
resource_id = current_channel['data-id']
|
||||
|
||||
token = self._search_regex(
|
||||
r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
|
||||
'token', group='token')
|
||||
|
||||
context = self._download_json(
|
||||
'https://tvplayer.com/watch/context', display_id,
|
||||
'Downloading JSON context', query={
|
||||
'resource': resource_id,
|
||||
'gen': token,
|
||||
})
|
||||
|
||||
validate = context['validate']
|
||||
platform = try_get(
|
||||
context, lambda x: x['platform']['key'], str) or 'firefox'
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'http://api.tvplayer.com/api/v2/stream/live',
|
||||
display_id, 'Downloading JSON stream', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}, data=urlencode_postdata({
|
||||
'id': resource_id,
|
||||
'service': 1,
|
||||
'platform': platform,
|
||||
'validate': validate,
|
||||
}))['tvplayer']['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError):
|
||||
response = self._parse_json(
|
||||
e.cause.response.read().decode(), resource_id)['tvplayer']['response']
|
||||
raise ExtractorError(
|
||||
'{} said: {}'.format(self.IE_NAME, response['error']), expected=True)
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': resource_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
@ -5,9 +5,3 @@ class UFCTVIE(ImgGamingBaseIE):
|
||||
_VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
|
||||
_NETRC_MACHINE = 'ufctv'
|
||||
_REALM = 'ufc'
|
||||
|
||||
|
||||
class UFCArabiaIE(ImgGamingBaseIE):
|
||||
_VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
|
||||
_NETRC_MACHINE = 'ufcarabia'
|
||||
_REALM = 'admufc'
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'info_dict': {
|
||||
'id': '2117008346001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pincers',
|
||||
'description': 'Pincers',
|
||||
'uploader_id': '1242911124001',
|
||||
'upload_date': '20130124',
|
||||
'timestamp': 1359049267,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % video_id,
|
||||
'BrightcoveNew', video_id)
|
||||
@ -71,6 +71,8 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
r'watch\.telusoriginals\.com',
|
||||
r'video\.unext\.jp',
|
||||
r'www\.web\.nhk',
|
||||
r'fod\.fujitv\.co\.jp',
|
||||
r'zee5\.com',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
@ -252,6 +254,14 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/14620
|
||||
'url': 'https://www.web.nhk/tv/an/72hours/pl/series-tep-W3W8WRN8M3/ep/QW8ZY6146V',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/7064
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/10264
|
||||
'url': 'https://fod.fujitv.co.jp/title/709f/709f130001/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -291,6 +301,8 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
|
||||
r'einthusan\.(?:tv|com|ca)',
|
||||
r'yourupload\.com',
|
||||
r'xanimu\.com',
|
||||
r'musicdex\.org',
|
||||
r'duboku\.io',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
|
||||
@ -1,98 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class UtreonIE(InfoExtractor):
|
||||
IE_NAME = 'playeur'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://utreon.com/v/z_I7ikQbuDw',
|
||||
'info_dict': {
|
||||
'id': 'z_I7ikQbuDw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Freedom Friday meditation - Rising in the wind',
|
||||
'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
|
||||
'uploader': 'Heather Dawn Elemental Health',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 586,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/jerJw5EOOVU',
|
||||
'info_dict': {
|
||||
'id': 'jerJw5EOOVU',
|
||||
'ext': 'mp4',
|
||||
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
|
||||
'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
|
||||
'uploader': 'Frases e Poemas Quotes and Poems',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 60,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/C4ZxXhYBBmE',
|
||||
'info_dict': {
|
||||
'id': 'C4ZxXhYBBmE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biden’s Capital Gains Tax Rate to Test World’s Highest',
|
||||
'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
|
||||
'uploader': 'Nomad Capitalist',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 884,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/Y-stEH-FBm8',
|
||||
'info_dict': {
|
||||
'id': 'Y-stEH-FBm8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
|
||||
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
|
||||
'uploader': 'Merryweather Comics',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210718',
|
||||
'duration': 151,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
|
||||
'info_dict': {
|
||||
'id': 'Wzqp-UrxSeu',
|
||||
'ext': 'mp4',
|
||||
'title': 'Update: Clockwork Basilisk Books on the Way!',
|
||||
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
|
||||
'uploader': 'Forgotten Weapons',
|
||||
'release_date': '20240208',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'duration': 262,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
'https://api.playeur.com/v1/videos/' + video_id,
|
||||
video_id)
|
||||
videos_json = json_data['videos']
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
'format_id': format_key.split('_')[1],
|
||||
'height': int(format_key.split('_')[1][:-1]),
|
||||
} for format_key, format_url in videos_json.items() if url_or_none(format_url)]
|
||||
thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_data['title'],
|
||||
'formats': formats,
|
||||
'description': str_or_none(json_data.get('description')),
|
||||
'duration': int_or_none(json_data.get('duration')),
|
||||
'uploader': str_or_none(try_get(json_data, lambda x: x['channel']['title'])),
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': unified_strdate(json_data.get('published_datetime')),
|
||||
}
|
||||
@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .xstream import XstreamIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@ -9,7 +8,8 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE
|
||||
class VGTVIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
|
||||
@ -1,27 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
|
||||
|
||||
class VideoDetectiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
'info_dict': {
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kick-Ass 2',
|
||||
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
query = 'customerid=69249&publishedid=' + video_id
|
||||
return self.url_result(
|
||||
InternetVideoArchiveIE._build_json_url(query),
|
||||
ie=InternetVideoArchiveIE.ie_key())
|
||||
@ -1,51 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VideofyMeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
IE_NAME = 'videofy.me'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.videofy.me/thisisvideofyme/1100701',
|
||||
'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
|
||||
'info_dict': {
|
||||
'id': '1100701',
|
||||
'ext': 'mp4',
|
||||
'title': 'This is VideofyMe',
|
||||
'description': '',
|
||||
'upload_date': '20130326',
|
||||
'timestamp': 1364288959,
|
||||
'uploader': 'VideofyMe',
|
||||
'uploader_id': 'thisisvideofyme',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(f'http://vf-player-info-loader.herokuapp.com/{video_id}.json', video_id)['videoinfo']
|
||||
|
||||
video = config.get('video')
|
||||
blog = config.get('blog', {})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'url': video['sources']['source']['url'],
|
||||
'thumbnail': video.get('thumb'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('date')),
|
||||
'uploader': blog.get('name'),
|
||||
'uploader_id': blog.get('identifier'),
|
||||
'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)),
|
||||
'like_count': int_or_none(video.get('likes')),
|
||||
'comment_count': int_or_none(video.get('nrOfComments')),
|
||||
}
|
||||
@ -1,190 +1,15 @@
|
||||
import base64
|
||||
import functools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .slideslive import SlidesLiveIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VideoKenBaseIE(InfoExtractor):
|
||||
_ORGANIZATIONS = {
|
||||
'videos.icts.res.in': 'icts',
|
||||
'videos.cncf.io': 'cncf',
|
||||
'videos.neurips.cc': 'neurips',
|
||||
}
|
||||
_BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
|
||||
|
||||
_PAGE_SIZE = 12
|
||||
|
||||
def _get_org_id_and_api_key(self, org, video_id):
|
||||
details = self._download_json(
|
||||
f'https://analytics.videoken.com/api/videolake/{org}/details', video_id,
|
||||
note='Downloading organization ID and API key', headers={
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
return details['id'], details['apikey']
|
||||
|
||||
def _create_slideslive_url(self, video_url, video_id, referer):
|
||||
if not video_url and not video_id:
|
||||
return
|
||||
elif not video_url or 'embed/sign-in' in video_url:
|
||||
video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
|
||||
if url_or_none(referer):
|
||||
return update_url_query(video_url, {
|
||||
'embed_parent_url': referer,
|
||||
'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}',
|
||||
})
|
||||
return video_url
|
||||
|
||||
def _extract_videos(self, videos, url):
|
||||
for video in traverse_obj(videos, (('videos', 'results'), ...)):
|
||||
video_id = traverse_obj(video, 'youtube_id', 'videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
ie_key = None
|
||||
if traverse_obj(video, 'type', 'source') == 'youtube':
|
||||
video_url = video_id
|
||||
ie_key = 'Youtube'
|
||||
else:
|
||||
video_url = traverse_obj(video, 'embed_url', 'embeddableurl', expected_type=url_or_none)
|
||||
if not video_url:
|
||||
continue
|
||||
elif urllib.parse.urlparse(video_url).hostname == 'slideslive.com':
|
||||
ie_key = SlidesLiveIE
|
||||
video_url = self._create_slideslive_url(video_url, video_id, url)
|
||||
yield self.url_result(video_url, ie_key, video_id)
|
||||
|
||||
|
||||
class VideoKenIE(VideoKenBaseIE):
|
||||
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# neurips -> videoken -> slideslive
|
||||
'url': 'https://videos.neurips.cc/video/slideslive-38922815',
|
||||
'info_dict': {
|
||||
'id': '38922815',
|
||||
'ext': 'mp4',
|
||||
'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
|
||||
'timestamp': 1630939331,
|
||||
'upload_date': '20210906',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:330',
|
||||
'chapters': 'count:329',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Failed to download VideoKen API JSON'],
|
||||
}, {
|
||||
# neurips -> videoken -> slideslive -> youtube
|
||||
'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
|
||||
'info_dict': {
|
||||
'id': '2Xa_dt78rJE',
|
||||
'ext': 'mp4',
|
||||
'display_id': '38923348',
|
||||
'title': 'Machine Education',
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38923348.',
|
||||
'channel': 'SlidesLive Videos - G2',
|
||||
'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
|
||||
'uploader': 'SlidesLive Videos - G2',
|
||||
'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
|
||||
'duration': 2504,
|
||||
'timestamp': 1618922125,
|
||||
'upload_date': '20200131',
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'availability': 'unlisted',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
|
||||
'thumbnails': 'count:78',
|
||||
'chapters': 'count:77',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Failed to download VideoKen API JSON'],
|
||||
}, {
|
||||
# icts -> videoken -> youtube
|
||||
'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
|
||||
'info_dict': {
|
||||
'id': 'zysIsojYdvc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
|
||||
'description': 'md5:87433069d79719eeadc1962cc2ace00b',
|
||||
'channel': 'International Centre for Theoretical Sciences',
|
||||
'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
|
||||
'uploader': 'International Centre for Theoretical Sciences',
|
||||
'uploader_id': 'ICTStalks',
|
||||
'uploader_url': 'http://www.youtube.com/user/ICTStalks',
|
||||
'duration': 3372,
|
||||
'upload_date': '20191004',
|
||||
'age_limit': 0,
|
||||
'live_status': 'not_live',
|
||||
'availability': 'public',
|
||||
'playable_in_embed': True,
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': [],
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
|
||||
'thumbnails': 'count:42',
|
||||
'chapters': 'count:20',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname, video_id = self._match_valid_url(url).group('host', 'id')
|
||||
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id)
|
||||
details = self._download_json(
|
||||
'https://analytics.videoken.com/api/videoinfo_private', video_id, query={
|
||||
'videoid': video_id,
|
||||
'org_id': org_id,
|
||||
}, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON',
|
||||
errnote='Failed to download VideoKen API JSON', fatal=False)
|
||||
if details:
|
||||
return next(self._extract_videos({'videos': [details]}, url))
|
||||
# fallback for API error 400 response
|
||||
elif video_id.startswith('slideslive-'):
|
||||
return self.url_result(
|
||||
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
|
||||
elif re.match(r'^[\w-]{11}$', video_id):
|
||||
return self.url_result(video_id, 'Youtube', video_id)
|
||||
else:
|
||||
raise ExtractorError('Unable to extract without VideoKen API response')
|
||||
|
||||
|
||||
class VideoKenPlayerIE(VideoKenBaseIE):
|
||||
class VideoKenPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.videoken.com/embed/slideslive-38968434',
|
||||
@ -203,135 +28,19 @@ class VideoKenPlayerIE(VideoKenBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _create_slideslive_url(self, video_url, video_id, referer):
|
||||
if not video_url and not video_id:
|
||||
return
|
||||
elif not video_url or 'embed/sign-in' in video_url:
|
||||
video_url = f'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
|
||||
if url_or_none(referer):
|
||||
return update_url_query(video_url, {
|
||||
'embed_parent_url': referer,
|
||||
'embed_container_origin': f'https://{urllib.parse.urlparse(referer).hostname}',
|
||||
})
|
||||
return video_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
|
||||
|
||||
|
||||
class VideoKenPlaylistIE(VideoKenBaseIE):
|
||||
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.icts.res.in/category/1822/playlist/381',
|
||||
'playlist_mincount': 117,
|
||||
'info_dict': {
|
||||
'id': '381',
|
||||
'title': 'Cosmology - The Next Decade',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname, playlist_id = self._match_valid_url(url).group('host', 'id')
|
||||
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id)
|
||||
videos = self._download_json(
|
||||
f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
|
||||
playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON')
|
||||
return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title'))
|
||||
|
||||
|
||||
class VideoKenCategoryIE(VideoKenBaseIE):
|
||||
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.icts.res.in/category/1822/',
|
||||
'playlist_mincount': 500,
|
||||
'info_dict': {
|
||||
'id': '1822',
|
||||
'title': 'Programs',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.neurips.cc/category/350/',
|
||||
'playlist_mincount': 34,
|
||||
'info_dict': {
|
||||
'id': '350',
|
||||
'title': 'NeurIPS 2018',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.cncf.io/category/479/',
|
||||
'playlist_mincount': 328,
|
||||
'info_dict': {
|
||||
'id': '479',
|
||||
'title': 'KubeCon + CloudNativeCon Europe\'19',
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_category_page(self, category_id, org_id, page=1, note=None):
|
||||
return self._download_json(
|
||||
f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id,
|
||||
fatal=False, note=note if note else f'Downloading category page {page}',
|
||||
query={
|
||||
'category_id': category_id,
|
||||
'page_number': page,
|
||||
'length': self._PAGE_SIZE,
|
||||
}, headers={'Accept': 'application/json'}) or {}
|
||||
|
||||
def _entries(self, category_id, org_id, url, page):
|
||||
videos = self._get_category_page(category_id, org_id, page + 1)
|
||||
yield from self._extract_videos(videos, url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname, category_id = self._match_valid_url(url).group('host', 'id')
|
||||
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id)
|
||||
category_info = self._get_category_page(category_id, org_id, note='Downloading category info')
|
||||
category = category_info['category_name']
|
||||
total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE)
|
||||
return self.playlist_result(InAdvancePagedList(
|
||||
functools.partial(self._entries, category_id, org_id, url),
|
||||
total_pages, self._PAGE_SIZE), category_id, category)
|
||||
|
||||
|
||||
class VideoKenTopicIE(VideoKenBaseIE):
|
||||
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.neurips.cc/topic/machine%20learning/',
|
||||
'playlist_mincount': 500,
|
||||
'info_dict': {
|
||||
'id': 'machine_learning',
|
||||
'title': 'machine learning',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
|
||||
'playlist_mincount': 77,
|
||||
'info_dict': {
|
||||
'id': 'gravitational_waves',
|
||||
'title': 'gravitational waves',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.cncf.io/topic/prometheus/',
|
||||
'playlist_mincount': 134,
|
||||
'info_dict': {
|
||||
'id': 'prometheus',
|
||||
'title': 'prometheus',
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None):
|
||||
return self._download_json(
|
||||
'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={
|
||||
'orgid': org_id,
|
||||
'size': self._PAGE_SIZE,
|
||||
'query': topic,
|
||||
'page': page,
|
||||
'sort': 'upload_desc',
|
||||
'filter': 'all',
|
||||
'token': api_key,
|
||||
'is_topic': 'true',
|
||||
'category': '',
|
||||
'searchid': search_id,
|
||||
}, headers={'Accept': 'application/json'},
|
||||
note=note if note else f'Downloading topic page {page}') or {}
|
||||
|
||||
def _entries(self, topic, org_id, search_id, api_key, url, page):
|
||||
videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1)
|
||||
yield from self._extract_videos(videos, url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname, topic_id = self._match_valid_url(url).group('host', 'id')
|
||||
topic = urllib.parse.unquote(topic_id)
|
||||
topic_id = topic.replace(' ', '_')
|
||||
org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic)
|
||||
search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode()
|
||||
total_pages = int_or_none(self._get_topic_page(
|
||||
topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages'])
|
||||
return self.playlist_result(InAdvancePagedList(
|
||||
functools.partial(self._entries, topic, org_id, search_id, api_key, url),
|
||||
total_pages, self._PAGE_SIZE), topic_id, topic)
|
||||
|
||||
@ -1,304 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class VideomoreBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://more.tv/api/v3/web/'
|
||||
_VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
|
||||
|
||||
def _download_page_data(self, display_id):
|
||||
return self._download_json(
|
||||
self._API_BASE_URL + 'PageData', display_id, query={
|
||||
'url': '/' + display_id,
|
||||
})['attributes']['response']['data']
|
||||
|
||||
def _track_url_result(self, track):
|
||||
track_vod = track['trackVod']
|
||||
video_url = track_vod.get('playerLink') or track_vod['link']
|
||||
return self.url_result(
|
||||
video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
|
||||
|
||||
|
||||
class VideomoreIE(InfoExtractor):
|
||||
IE_NAME = 'videomore'
|
||||
_VALID_URL = r'''(?x)
|
||||
videomore:(?P<sid>\d+)$|
|
||||
https?://
|
||||
(?:
|
||||
videomore\.ru/
|
||||
(?:
|
||||
embed|
|
||||
[^/]+/[^/]+
|
||||
)/|
|
||||
(?:
|
||||
(?:player\.)?videomore\.ru|
|
||||
siren\.more\.tv/player
|
||||
)/[^/]*\?.*?\btrack_id=|
|
||||
odysseus\.more.tv/player/(?P<partner_id>\d+)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?:[/?#&]|\.(?:xml|json)|$)
|
||||
'''
|
||||
_EMBED_REGEX = [r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+src=([\'"])|
|
||||
<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=
|
||||
)(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?)
|
||||
''']
|
||||
_TESTS = [{
|
||||
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
|
||||
'md5': '44455a346edc0d509ac5b5a5b531dc35',
|
||||
'info_dict': {
|
||||
'id': '367617',
|
||||
'ext': 'flv',
|
||||
'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
|
||||
'series': 'Кино в деталях',
|
||||
'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2910,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 16,
|
||||
},
|
||||
'skip': 'The video is not available for viewing.',
|
||||
}, {
|
||||
'url': 'http://videomore.ru/embed/259974',
|
||||
'info_dict': {
|
||||
'id': '259974',
|
||||
'ext': 'mp4',
|
||||
'title': 'Молодежка 2 сезон 40 серия',
|
||||
'series': 'Молодежка',
|
||||
'season': '2 сезон',
|
||||
'episode': '40 серия',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2789,
|
||||
'view_count': int,
|
||||
'age_limit': 16,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
|
||||
'info_dict': {
|
||||
'id': '341073',
|
||||
'ext': 'flv',
|
||||
'title': 'Промо Команда проиграла из-за Бакина?',
|
||||
'episode': 'Команда проиграла из-за Бакина?',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 29,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The video is not available for viewing.',
|
||||
}, {
|
||||
'url': 'http://videomore.ru/elki_3?track_id=364623',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/embed/364623',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/video/tracks/364623.xml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/video/tracks/364623.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'videomore:367617',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://odysseus.more.tv/player/1788/352317',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('sid') or mobj.group('id')
|
||||
partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97'
|
||||
|
||||
item = self._download_json(
|
||||
'https://siren.more.tv/player/config', video_id, query={
|
||||
'partner_id': partner_id,
|
||||
'track_id': video_id,
|
||||
})['data']['playlist']['items'][0]
|
||||
|
||||
title = item.get('title')
|
||||
series = item.get('project_name')
|
||||
season = item.get('season_name')
|
||||
episode = item.get('episode_name')
|
||||
if not title:
|
||||
title = []
|
||||
for v in (series, season, episode):
|
||||
if v:
|
||||
title.append(v)
|
||||
title = ' '.join(title)
|
||||
|
||||
streams = item.get('streams') or []
|
||||
for protocol in ('DASH', 'HLS'):
|
||||
stream_url = item.get(protocol.lower() + '_url')
|
||||
if stream_url:
|
||||
streams.append({'protocol': protocol, 'url': stream_url})
|
||||
|
||||
formats = []
|
||||
for stream in streams:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
protocol = stream.get('protocol')
|
||||
if protocol == 'DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
stream_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif protocol == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif protocol == 'MSS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
stream_url, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
if not formats:
|
||||
error = item.get('error')
|
||||
if error:
|
||||
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
|
||||
self.raise_geo_restricted(countries=['RU'], metadata_available=True)
|
||||
self.raise_no_formats(error, expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'episode': episode,
|
||||
'thumbnail': item.get('thumbnail_url'),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
'age_limit': int_or_none(item.get('min_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VideomoreVideoIE(VideomoreBaseIE):
|
||||
IE_NAME = 'videomore:video'
|
||||
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
# single video with og:video:iframe
|
||||
'url': 'http://videomore.ru/elki_3',
|
||||
'info_dict': {
|
||||
'id': '364623',
|
||||
'ext': 'flv',
|
||||
'title': 'Ёлки 3',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 5579,
|
||||
'age_limit': 6,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# season single series with og:video:iframe
|
||||
'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
|
||||
'info_dict': {
|
||||
'id': '352317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Последний мент 1 сезон 14 серия',
|
||||
'series': 'Последний мент',
|
||||
'season': '1 сезон',
|
||||
'episode': '14 серия',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2464,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# single video without og:video:iframe
|
||||
'url': 'http://videomore.ru/marin_i_ego_druzya',
|
||||
'info_dict': {
|
||||
'id': '359073',
|
||||
'ext': 'flv',
|
||||
'title': '1 серия. Здравствуй, Аквавилль!',
|
||||
'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 754,
|
||||
'age_limit': 6,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'redirects to https://more.tv/',
|
||||
}, {
|
||||
'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if VideomoreIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._track_url_result(self._download_page_data(display_id))
|
||||
|
||||
|
||||
class VideomoreSeasonIE(VideomoreBaseIE):
|
||||
IE_NAME = 'videomore:season'
|
||||
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
'url': 'http://videomore.ru/molodezhka/film_o_filme',
|
||||
'info_dict': {
|
||||
'id': 'molodezhka/film_o_filme',
|
||||
'title': 'Фильм о фильме',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://more.tv/molodezhka/film_o_filme',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
season = self._download_page_data(display_id)
|
||||
season_id = str(season['id'])
|
||||
tracks = self._download_json(
|
||||
self._API_BASE_URL + f'seasons/{season_id}/tracks',
|
||||
season_id)['data']
|
||||
entries = []
|
||||
for track in tracks:
|
||||
entries.append(self._track_url_result(track))
|
||||
return self.playlist_result(entries, display_id, season.get('title'))
|
||||
@ -1,66 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VimmIE(InfoExtractor):
|
||||
IE_NAME = 'Vimm:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/(?:c/)?(?P<id>[0-9a-z-]+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vimm.tv/c/calimeatwagon',
|
||||
'info_dict': {
|
||||
'id': 'calimeatwagon',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^calimeatwagon [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Live',
|
||||
}, {
|
||||
'url': 'https://www.vimm.tv/octaafradio',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': channel_id,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
|
||||
class VimmRecordingIE(InfoExtractor):
|
||||
IE_NAME = 'Vimm:recording'
|
||||
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<channel_id>[0-9a-z-]+)\?v=(?P<video_id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vimm.tv/c/kaldewei?v=2JZsrPTFxsSz',
|
||||
'md5': '15122ee95baa32a548e4a3e120b598f1',
|
||||
'info_dict': {
|
||||
'id': '2JZsrPTFxsSz',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIMM - [DE/GER] Kaldewei Live - In Farbe und Bunt',
|
||||
'uploader_id': 'kaldewei',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': False,
|
||||
'uploader_id': channel_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
@ -1,29 +0,0 @@
|
||||
from .onet import OnetBaseIE
|
||||
|
||||
|
||||
class VODPlIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
|
||||
'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
|
||||
'info_dict': {
|
||||
'id': '3ep3jns',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chłopaki nie płaczą',
|
||||
'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
|
||||
'timestamp': 1463415154,
|
||||
'duration': 5765,
|
||||
'upload_date': '20160516',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
|
||||
info_dict['id'] = video_id
|
||||
return info_dict
|
||||
@ -1,66 +0,0 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class VuClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247§ion=recommend',
|
||||
'info_dict': {
|
||||
'id': '1129900602',
|
||||
'ext': '3gp',
|
||||
'title': 'Top 10 TV Convicts',
|
||||
'duration': 733,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ad_m = re.search(
|
||||
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
|
||||
if ad_m:
|
||||
urlr = urllib.parse.urlparse(url)
|
||||
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
|
||||
webpage = self._download_webpage(
|
||||
adfree_url, video_id, note='Download post-ad page')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'<p class="message">(.*?)</p>', webpage, 'error message',
|
||||
default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError(
|
||||
f'{self.IE_NAME} said: {error_msg}', expected=True)
|
||||
|
||||
# These clowns alternate between two page types
|
||||
video_url = self._search_regex(
|
||||
r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
|
||||
webpage, 'video URL', default=None)
|
||||
if video_url:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
else:
|
||||
formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
}
|
||||
@ -1,336 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VVVVIDIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
|
||||
_VALID_URL = rf'{_VALID_URL_BASE}(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# video_type == 'video/vvvvid'
|
||||
'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
|
||||
'info_dict': {
|
||||
'id': '505692',
|
||||
'ext': 'mp4',
|
||||
'title': 'Playstation VR cambierà il nostro modo di giocare',
|
||||
'duration': 93,
|
||||
'series': 'The Power of Computing',
|
||||
'season_id': '518',
|
||||
'episode': 'Playstation VR cambierà il nostro modo di giocare',
|
||||
'episode_id': '4747',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video_type == 'video/rcs'
|
||||
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
|
||||
'info_dict': {
|
||||
'id': '482493',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 01',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Every video/rcs is not working even in real website',
|
||||
}, {
|
||||
# video_type == 'video/youtube'
|
||||
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
|
||||
'md5': '33e0edfba720ad73a8782157fdebc648',
|
||||
'info_dict': {
|
||||
'id': 'RzmFKUDOUgw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trailer',
|
||||
'upload_date': '20150906',
|
||||
'description': 'md5:a5e802558d35247fee285875328c0b80',
|
||||
'uploader_id': '@EMOTIONLabelChannel',
|
||||
'uploader': 'EMOTION Label Channel',
|
||||
'episode_id': '3115',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'availability': str,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'channel': 'EMOTION Label Channel',
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
|
||||
'comment_count': int,
|
||||
'duration': 133,
|
||||
'episode': 'Trailer',
|
||||
'heatmap': list,
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'season_id': '406',
|
||||
'series': 'One-Punch Man',
|
||||
'tags': list,
|
||||
'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video_type == 'video/dash'
|
||||
'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
|
||||
'info_dict': {
|
||||
'id': '527551',
|
||||
'ext': 'mp4',
|
||||
'title': 'Golden Wind',
|
||||
'duration': 1430,
|
||||
'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
|
||||
'season_id': '938',
|
||||
'episode': 'Golden Wind',
|
||||
'episode_number': 1,
|
||||
'episode_id': '9089',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_conn_id = None
|
||||
|
||||
@functools.cached_property
|
||||
def _headers(self):
|
||||
return {
|
||||
**self.geo_verification_headers(),
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._conn_id = self._download_json(
|
||||
'https://www.vvvvid.it/user/login',
|
||||
None, headers=self._headers)['data']['conn_id']
|
||||
|
||||
def _download_info(self, show_id, path, video_id, fatal=True, query=None):
|
||||
q = {
|
||||
'conn_id': self._conn_id,
|
||||
}
|
||||
if query:
|
||||
q.update(query)
|
||||
response = self._download_json(
|
||||
f'https://www.vvvvid.it/vvvvid/ondemand/{show_id}/{path}',
|
||||
video_id, headers=self._headers, query=q, fatal=fatal)
|
||||
if not (response or fatal):
|
||||
return
|
||||
if response.get('result') == 'error':
|
||||
raise ExtractorError('{} said: {}'.format(
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
return response['data']
|
||||
|
||||
def _extract_common_video_info(self, video_data):
|
||||
return {
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
response = self._download_info(
|
||||
show_id, f'season/{season_id}',
|
||||
video_id, query={'video_id': video_id})
|
||||
|
||||
vid = int(video_id)
|
||||
video_data = next(filter(
|
||||
lambda episode: episode.get('video_id') == vid, response))
|
||||
title = video_data['title']
|
||||
formats = []
|
||||
|
||||
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
|
||||
def ds(h):
|
||||
g = 'MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij'
|
||||
|
||||
def f(m):
|
||||
l = []
|
||||
o = 0
|
||||
b = False
|
||||
m_len = len(m)
|
||||
while ((not b) and o < m_len):
|
||||
n = m[o] << 2
|
||||
o += 1
|
||||
k = -1
|
||||
j = -1
|
||||
if o < m_len:
|
||||
n += m[o] >> 4
|
||||
o += 1
|
||||
if o < m_len:
|
||||
k = (m[o - 1] << 4) & 255
|
||||
k += m[o] >> 2
|
||||
o += 1
|
||||
if o < m_len:
|
||||
j = (m[o - 1] << 6) & 255
|
||||
j += m[o]
|
||||
o += 1
|
||||
else:
|
||||
b = True
|
||||
else:
|
||||
b = True
|
||||
else:
|
||||
b = True
|
||||
l.append(n)
|
||||
if k != -1:
|
||||
l.append(k)
|
||||
if j != -1:
|
||||
l.append(j)
|
||||
return l
|
||||
|
||||
c = []
|
||||
for e in h:
|
||||
c.append(g.index(e))
|
||||
|
||||
c_len = len(c)
|
||||
for e in range(c_len * 2 - 1, -1, -1):
|
||||
a = c[e % c_len] ^ c[(e + 1) % c_len]
|
||||
c[e % c_len] = a
|
||||
|
||||
c = f(c)
|
||||
d = ''
|
||||
for e in c:
|
||||
d += chr(e)
|
||||
|
||||
return d
|
||||
|
||||
info = {}
|
||||
|
||||
def metadata_from_url(r_url):
|
||||
if not info and r_url:
|
||||
mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
|
||||
if mobj:
|
||||
info['episode_number'] = int(mobj.group(2))
|
||||
season_number = mobj.group(1)
|
||||
if season_number:
|
||||
info['season_number'] = int(season_number)
|
||||
|
||||
video_type = video_data.get('video_type')
|
||||
is_youtube = False
|
||||
for quality in ('', '_sd'):
|
||||
embed_code = video_data.get('embed_info' + quality)
|
||||
if not embed_code:
|
||||
continue
|
||||
embed_code = ds(embed_code)
|
||||
if video_type == 'video/kenc':
|
||||
embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
|
||||
kenc = self._download_json(
|
||||
'https://www.vvvvid.it/kenc', video_id, query={
|
||||
'action': 'kt',
|
||||
'conn_id': self._conn_id,
|
||||
'url': embed_code,
|
||||
}, fatal=False) or {}
|
||||
kenc_message = kenc.get('message')
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_type == 'video/rcs':
|
||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||
elif video_type == 'video/youtube':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'url': embed_code,
|
||||
})
|
||||
is_youtube = True
|
||||
break
|
||||
elif video_type == 'video/dash':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
f'http://sb.top-ix.org/videomg/_definst_/mp4:{embed_code}/playlist.m3u8', video_id, skip_protocols=['f4m']))
|
||||
metadata_from_url(embed_code)
|
||||
|
||||
if not is_youtube:
|
||||
info['formats'] = formats
|
||||
|
||||
metadata_from_url(video_data.get('thumbnail'))
|
||||
info.update(self._extract_common_video_info(video_data))
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'series': video_data.get('show_title'),
|
||||
'season_id': season_id,
|
||||
'episode': title,
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'like_count': int_or_none(video_data.get('video_likes')),
|
||||
'repost_count': int_or_none(video_data.get('video_shares')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = rf'(?P<base_url>{VVVVIDIE._VALID_URL_BASE}(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
|
||||
'info_dict': {
|
||||
'id': '156',
|
||||
'title': 'Psycho-Pass',
|
||||
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
|
||||
},
|
||||
'playlist_count': 46,
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, show_title = self._match_valid_url(url).groups()
|
||||
|
||||
seasons = self._download_info(
|
||||
show_id, 'seasons/', show_title)
|
||||
|
||||
show_info = self._download_info(
|
||||
show_id, 'info/', show_title, fatal=False)
|
||||
|
||||
if not show_title:
|
||||
base_url += '/title'
|
||||
|
||||
entries = []
|
||||
for season in (seasons or []):
|
||||
episodes = season.get('episodes') or []
|
||||
playlist_title = season.get('name') or show_info.get('title')
|
||||
for episode in episodes:
|
||||
if episode.get('playable') is False:
|
||||
continue
|
||||
season_id = str_or_none(episode.get('season_id'))
|
||||
video_id = str_or_none(episode.get('video_id'))
|
||||
if not (season_id and video_id):
|
||||
continue
|
||||
info = self._extract_common_video_info(episode)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VVVVIDIE.ie_key(),
|
||||
'url': '/'.join([base_url, season_id, video_id]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'season_id': season_id,
|
||||
'playlist_title': playlist_title,
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, show_info.get('title'), show_info.get('description'))
|
||||
@ -347,35 +347,3 @@ class WDRElefantIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
f'{display_id} is not a video', expected=True)
|
||||
return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
|
||||
|
||||
|
||||
class WDRMobileIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://mobile-ondemand\.wdr\.de/
|
||||
.*?/fsk(?P<age_limit>[0-9]+)
|
||||
/[0-9]+/[0-9]+/
|
||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||
IE_NAME = 'wdr:mobile'
|
||||
_WORKING = False # no such domain
|
||||
_TEST = {
|
||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||
'info_dict': {
|
||||
'title': '4283021',
|
||||
'id': '421735',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Problems with loading data.',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'title': mobj.group('title'),
|
||||
'age_limit': int(mobj.group('age_limit')),
|
||||
'url': url,
|
||||
'http_headers': {
|
||||
'User-Agent': 'mobile',
|
||||
},
|
||||
}
|
||||
|
||||
@ -1,86 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WeyyakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://weyyak\.com/(?P<lang>\w+)/(?:player/)?(?P<type>episode|movie)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://weyyak.com/en/player/episode/1341952/Ribat-Al-Hob-Episode49',
|
||||
'md5': '0caf55c1a615531c8fe60f146ae46849',
|
||||
'info_dict': {
|
||||
'id': '1341952',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ribat Al Hob',
|
||||
'duration': 2771,
|
||||
'alt_title': 'رباط الحب',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 49',
|
||||
'episode_number': 49,
|
||||
'timestamp': 1485907200,
|
||||
'upload_date': '20170201',
|
||||
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
|
||||
'categories': ['Drama', 'Thrillers', 'Romance'],
|
||||
'tags': 'count:8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://weyyak.com/en/movie/233255/8-Seconds',
|
||||
'md5': 'fe740ae0f63e4d1c8a7fc147a410c564',
|
||||
'info_dict': {
|
||||
'id': '233255',
|
||||
'ext': 'mp4',
|
||||
'title': '8 Seconds',
|
||||
'duration': 6490,
|
||||
'alt_title': '8 ثواني',
|
||||
'description': 'md5:45b83a155c30b49950624c7e99600b9d',
|
||||
'age_limit': 15,
|
||||
'release_year': 2015,
|
||||
'timestamp': 1683106031,
|
||||
'upload_date': '20230503',
|
||||
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
|
||||
'categories': ['Drama', 'Social'],
|
||||
'cast': ['Ceylin Adiyaman', 'Esra Inal'],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang, type_ = self._match_valid_url(url).group('id', 'lang', 'type')
|
||||
|
||||
path = 'episode/' if type_ == 'episode' else 'contents/moviedetails?contentkey='
|
||||
data = self._download_json(
|
||||
f'https://msapifo-prod-me.weyyak.z5.com/v1/{lang}/{path}{video_id}', video_id)['data']
|
||||
m3u8_url = self._download_json(
|
||||
f'https://api-weyyak.akamaized.net/get_info/{data["video_id"]}',
|
||||
video_id, 'Extracting video details')['url_video']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('translated_title', {str}),
|
||||
'description': ('synopsis', {str}),
|
||||
'duration': ('length', {float_or_none}),
|
||||
'age_limit': ('age_rating', {parse_age_limit}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'thumbnail': ('imagery', 'thumbnail', {url_or_none}),
|
||||
'categories': ('genres', ..., {str}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'cast': (('main_actor', 'main_actress'), {str}),
|
||||
'timestamp': ('insertedAt', {unified_timestamp}),
|
||||
'release_year': ('production_year', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@ -1,114 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class XstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
xstream:|
|
||||
https?://frontend\.xstream\.(?:dk|net)/
|
||||
)
|
||||
(?P<partner_id>[^/]+)
|
||||
(?:
|
||||
:|
|
||||
/feed/video/\?.*?\bid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
|
||||
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
|
||||
'info_dict': {
|
||||
'id': '86588',
|
||||
'ext': 'mov',
|
||||
'title': 'Otto Wollertsen',
|
||||
'description': 'Vestlendingen Otto Fredrik Wollertsen',
|
||||
'timestamp': 1430473209,
|
||||
'upload_date': '20150501',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, partner_id, video_id):
|
||||
data = self._download_xml(
|
||||
f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}',
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:{}'.format(mobj.group('playpath')),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
'quality': 1,
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
partner_id = mobj.group('partner_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
return self._extract_video_info(partner_id, video_id)
|
||||
@ -1,269 +0,0 @@
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
str_or_none,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Zee5IE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
zee5:|
|
||||
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
|
||||
(?:
|
||||
(?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
|
||||
|(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+
|
||||
)/(?P<display_id>[^#/?]+)/
|
||||
)
|
||||
(?P<id>[^#/?]+)/?(?:$|[?#])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
|
||||
'info_dict': {
|
||||
'id': '0-0-movie_1143162669',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'adavari-matalaku-ardhale-verule',
|
||||
'title': 'Adavari Matalaku Ardhale Verule',
|
||||
'duration': 9360,
|
||||
'description': str,
|
||||
'alt_title': 'Adavari Matalaku Ardhale Verule',
|
||||
'uploader': 'Zee Entertainment Enterprises Ltd',
|
||||
'release_date': '20070427',
|
||||
'upload_date': '20070427',
|
||||
'timestamp': 1177632000,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
|
||||
'info_dict': {
|
||||
'id': '0-1-239839',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
|
||||
'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
|
||||
'duration': 659,
|
||||
'description': str,
|
||||
'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
|
||||
'uploader': 'Zee Entertainment Enterprises Ltd',
|
||||
'release_date': '20150101',
|
||||
'upload_date': '20150101',
|
||||
'timestamp': 1420070400,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'series': 'Bandbudh Aur Budbak',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DEVICE_ID = str(uuid.uuid4())
|
||||
_USER_TOKEN = None
|
||||
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
|
||||
_NETRC_MACHINE = 'zee5'
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_USER_COUNTRY = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
|
||||
self.report_login()
|
||||
otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
|
||||
None, note='Sending OTP')
|
||||
if otp_request_json['code'] == 0:
|
||||
self.to_screen(otp_request_json['message'])
|
||||
else:
|
||||
raise ExtractorError(otp_request_json['message'], expected=True)
|
||||
otp_code = self._get_tfa_info('OTP')
|
||||
otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
|
||||
None, note='Verifying OTP', fatal=False)
|
||||
if not otp_verify_json:
|
||||
raise ExtractorError('Unable to verify OTP.', expected=True)
|
||||
self._USER_TOKEN = otp_verify_json.get('token')
|
||||
if not self._USER_TOKEN:
|
||||
raise ExtractorError(otp_request_json['message'], expected=True)
|
||||
elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
|
||||
self._USER_TOKEN = password
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
token = jwt_decode_hs256(self._USER_TOKEN)
|
||||
if token.get('exp', 0) <= int(time.time()):
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
self._USER_COUNTRY = token.get('current_country')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
access_token_request = self._download_json(
|
||||
'https://launchapi.zee5.com/launch?platform_name=web_app',
|
||||
video_id, note='Downloading access token')['platform_token']
|
||||
data = {
|
||||
'x-access-token': access_token_request['token'],
|
||||
}
|
||||
if self._USER_TOKEN:
|
||||
data['Authorization'] = f'bearer {self._USER_TOKEN}'
|
||||
else:
|
||||
data['X-Z5-Guest-Token'] = self._DEVICE_ID
|
||||
|
||||
json_data = self._download_json(
|
||||
'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
|
||||
'content_id': video_id,
|
||||
'device_id': self._DEVICE_ID,
|
||||
'platform_name': 'desktop_web',
|
||||
'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
|
||||
'check_parental_control': False,
|
||||
}, headers={'content-type': 'application/json'}, data=json.dumps(data).encode())
|
||||
asset_data = json_data['assetDetails']
|
||||
show_data = json_data.get('showDetails', {})
|
||||
if 'premium' in asset_data['business_type']:
|
||||
raise ExtractorError('Premium content is DRM protected.', expected=True)
|
||||
if not asset_data.get('hls_url'):
|
||||
self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
|
||||
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
|
||||
|
||||
subtitles = {}
|
||||
for sub in asset_data.get('subtitle_url', []):
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('language', 'en'), []).append({
|
||||
'url': self._proto_relative_url(sub_url),
|
||||
})
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': asset_data['title'],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(asset_data.get('duration')),
|
||||
'description': str_or_none(asset_data.get('description')),
|
||||
'alt_title': str_or_none(asset_data.get('original_title')),
|
||||
'uploader': str_or_none(asset_data.get('content_owner')),
|
||||
'age_limit': parse_age_limit(asset_data.get('age_rating')),
|
||||
'release_date': unified_strdate(asset_data.get('release_date')),
|
||||
'timestamp': unified_timestamp(asset_data.get('release_date')),
|
||||
'thumbnail': url_or_none(asset_data.get('image_url')),
|
||||
'series': str_or_none(asset_data.get('tvshow_name')),
|
||||
'season': try_get(show_data, lambda x: x['seasons']['title'], str),
|
||||
'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
|
||||
'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
|
||||
'tags': try_get(asset_data, lambda x: x['tags'], list),
|
||||
}
|
||||
|
||||
|
||||
class Zee5SeriesIE(InfoExtractor):
|
||||
IE_NAME = 'zee5:series'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
zee5:series:|
|
||||
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
|
||||
(?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2}
|
||||
)
|
||||
(?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
|
||||
'playlist_mincount': 156,
|
||||
'info_dict': {
|
||||
'id': '0-6-1899',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
|
||||
'playlist_mincount': 1500,
|
||||
'info_dict': {
|
||||
'id': '0-6-199',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
|
||||
'playlist_mincount': 24,
|
||||
'info_dict': {
|
||||
'id': '0-6-965',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': '0-6-3201',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': '0-6-270',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, show_id):
|
||||
access_token_request = self._download_json(
|
||||
'https://launchapi.zee5.com/launch?platform_name=web_app',
|
||||
show_id, note='Downloading access token')['platform_token']
|
||||
headers = {
|
||||
'X-Access-Token': access_token_request['token'],
|
||||
'Referer': 'https://www.zee5.com/',
|
||||
}
|
||||
show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
|
||||
|
||||
page_num = 0
|
||||
show_json = self._download_json(show_url, video_id=show_id, headers=headers)
|
||||
for season in show_json.get('seasons') or []:
|
||||
season_id = try_get(season, lambda x: x['id'], str)
|
||||
next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
|
||||
while next_url:
|
||||
page_num += 1
|
||||
episodes_json = self._download_json(
|
||||
next_url, video_id=show_id, headers=headers,
|
||||
note=f'Downloading JSON metadata page {page_num}')
|
||||
for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
|
||||
video_id = episode.get('id')
|
||||
yield self.url_result(
|
||||
f'zee5:{video_id}',
|
||||
ie=Zee5IE.ie_key(), video_id=video_id)
|
||||
next_url = url_or_none(episodes_json.get('next_episode_api'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
Loading…
x
Reference in New Issue
Block a user