mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-06-28 05:35:21 +00:00
Compare commits
19 Commits
1e086fe17e
...
8997319fc7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8997319fc7 | ||
|
|
dfcbda9aff | ||
|
|
25cd7c1ecb | ||
|
|
28f04e8a5e | ||
|
|
a3e91df30a | ||
|
|
80736b9c90 | ||
|
|
1ae6bff564 | ||
|
|
b37ff4de5b | ||
|
|
3690e91265 | ||
|
|
8cb08028f5 | ||
|
|
1cf39ddf3d | ||
|
|
c2d6659d10 | ||
|
|
26feac3dd1 | ||
|
|
70599e53b7 | ||
|
|
8d127b18f8 | ||
|
|
7d05aa99c6 | ||
|
|
36da6360e1 | ||
|
|
e7e3b7a55c | ||
|
|
dce8234624 |
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
@ -38,3 +38,5 @@ jobs:
|
|||||||
run: ruff check --output-format github .
|
run: ruff check --output-format github .
|
||||||
- name: Run autopep8
|
- name: Run autopep8
|
||||||
run: autopep8 --diff .
|
run: autopep8 --diff .
|
||||||
|
- name: Check file mode
|
||||||
|
run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" )
|
||||||
|
|||||||
@ -1799,9 +1799,6 @@ The following extractors use this feature:
|
|||||||
#### vikichannel
|
#### vikichannel
|
||||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||||
|
|
||||||
#### niconico
|
|
||||||
* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
|
|
||||||
|
|
||||||
#### youtubewebarchive
|
#### youtubewebarchive
|
||||||
* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
|
* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
|
||||||
|
|
||||||
@ -2153,7 +2150,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|||||||
|
|
||||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||||
|
|
||||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||||
|
|
||||||
* **YouTube improvements**:
|
* **YouTube improvements**:
|
||||||
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
||||||
|
|||||||
@ -30,7 +30,7 @@ from .hls import HlsFD
|
|||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
from .mhtml import MhtmlFD
|
from .mhtml import MhtmlFD
|
||||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
from .niconico import NiconicoLiveFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .websocket import WebSocketFragmentFD
|
from .websocket import WebSocketFragmentFD
|
||||||
@ -50,7 +50,6 @@ PROTOCOL_MAP = {
|
|||||||
'http_dash_segments_generator': DashSegmentsFD,
|
'http_dash_segments_generator': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
'mhtml': MhtmlFD,
|
'mhtml': MhtmlFD,
|
||||||
'niconico_dmc': NiconicoDmcFD,
|
|
||||||
'niconico_live': NiconicoLiveFD,
|
'niconico_live': NiconicoLiveFD,
|
||||||
'fc2_live': FC2LiveFD,
|
'fc2_live': FC2LiveFD,
|
||||||
'websocket_frag': WebSocketFragmentFD,
|
'websocket_frag': WebSocketFragmentFD,
|
||||||
@ -67,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False):
|
|||||||
'rtmp_ffmpeg': 'rtmpF',
|
'rtmp_ffmpeg': 'rtmpF',
|
||||||
'http_dash_segments': 'dash',
|
'http_dash_segments': 'dash',
|
||||||
'http_dash_segments_generator': 'dashG',
|
'http_dash_segments_generator': 'dashG',
|
||||||
'niconico_dmc': 'dmc',
|
|
||||||
'websocket_frag': 'WSfrag',
|
'websocket_frag': 'WSfrag',
|
||||||
}
|
}
|
||||||
if simplify:
|
if simplify:
|
||||||
|
|||||||
@ -2,60 +2,12 @@ import json
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from . import get_suitable_downloader
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .external import FFmpegFD
|
from .external import FFmpegFD
|
||||||
from ..networking import Request
|
from ..networking import Request
|
||||||
from ..utils import DownloadError, str_or_none, try_get
|
from ..utils import DownloadError, str_or_none, try_get
|
||||||
|
|
||||||
|
|
||||||
class NiconicoDmcFD(FileDownloader):
|
|
||||||
""" Downloading niconico douga from DMC with heartbeat """
|
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
|
||||||
from ..extractor.niconico import NiconicoIE
|
|
||||||
|
|
||||||
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
|
|
||||||
ie = NiconicoIE(self.ydl)
|
|
||||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
|
||||||
|
|
||||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
|
||||||
|
|
||||||
success = download_complete = False
|
|
||||||
timer = [None]
|
|
||||||
heartbeat_lock = threading.Lock()
|
|
||||||
heartbeat_url = heartbeat_info_dict['url']
|
|
||||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
|
||||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
|
||||||
|
|
||||||
request = Request(heartbeat_url, heartbeat_data)
|
|
||||||
|
|
||||||
def heartbeat():
|
|
||||||
try:
|
|
||||||
self.ydl.urlopen(request).read()
|
|
||||||
except Exception:
|
|
||||||
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
|
|
||||||
|
|
||||||
with heartbeat_lock:
|
|
||||||
if not download_complete:
|
|
||||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
|
||||||
timer[0].start()
|
|
||||||
|
|
||||||
heartbeat_info_dict['ping']()
|
|
||||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
|
||||||
try:
|
|
||||||
heartbeat()
|
|
||||||
if type(fd).__name__ == 'HlsFD':
|
|
||||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
|
||||||
success = fd.real_download(filename, info_dict)
|
|
||||||
finally:
|
|
||||||
if heartbeat_lock:
|
|
||||||
with heartbeat_lock:
|
|
||||||
timer[0].cancel()
|
|
||||||
download_complete = True
|
|
||||||
return success
|
|
||||||
|
|
||||||
|
|
||||||
class NiconicoLiveFD(FileDownloader):
|
class NiconicoLiveFD(FileDownloader):
|
||||||
""" Downloads niconico live without being stopped """
|
""" Downloads niconico live without being stopped """
|
||||||
|
|
||||||
|
|||||||
@ -1042,6 +1042,7 @@ from .limelight import (
|
|||||||
LimelightMediaIE,
|
LimelightMediaIE,
|
||||||
)
|
)
|
||||||
from .linkedin import (
|
from .linkedin import (
|
||||||
|
LinkedInEventsIE,
|
||||||
LinkedInIE,
|
LinkedInIE,
|
||||||
LinkedInLearningCourseIE,
|
LinkedInLearningCourseIE,
|
||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
|
|||||||
@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
join_nonempty,
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
parse_resolution,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -110,24 +111,23 @@ class BpbIE(InfoExtractor):
|
|||||||
|
|
||||||
return attributes
|
return attributes
|
||||||
|
|
||||||
@staticmethod
|
def _process_source(self, source):
|
||||||
def _process_source(source):
|
|
||||||
url = url_or_none(source['src'])
|
url = url_or_none(source['src'])
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
source_type = source.get('type', '')
|
source_type = source.get('type', '')
|
||||||
extension = mimetype2ext(source_type)
|
extension = mimetype2ext(source_type)
|
||||||
is_video = source_type.startswith('video')
|
note = self._search_regex(r'[_-]([a-z]+)\.[\da-z]+(?:$|\?)', url, 'note', default=None)
|
||||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': url,
|
'url': url,
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'vcodec': None if is_video else 'none',
|
'vcodec': None if source_type.startswith('video') else 'none',
|
||||||
'quality': 10 if note == 'high' else 0,
|
'quality': 10 if note == 'high' else 0,
|
||||||
'format_note': note,
|
'format_note': note,
|
||||||
'format_id': join_nonempty(extension, note),
|
'format_id': join_nonempty(extension, note),
|
||||||
|
**parse_resolution(source.get('label')),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
classproperty,
|
classproperty,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
parse_qs,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -91,11 +92,15 @@ class DacastVODIE(DacastBaseIE):
|
|||||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||||
return self._search_regex(
|
return self._search_regex(
|
||||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
'usp signing secret', group='secret', fatal=False) or 'hGDtqMKYVeFdofrAfFmBcrsakaZELajI'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
query = {
|
||||||
|
'contentId': f'{user_id}-vod-{video_id}',
|
||||||
|
'provider': 'universe',
|
||||||
|
**traverse_obj(url, ({parse_qs}, 'uss_token', {'signedKey': -1})),
|
||||||
|
}
|
||||||
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||||
access = self._download_json(
|
access = self._download_json(
|
||||||
'https://playback.dacast.com/content/access', video_id,
|
'https://playback.dacast.com/content/access', video_id,
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,12 +10,12 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import find_elements, require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class LinkedInBaseIE(InfoExtractor):
|
class LinkedInBaseIE(InfoExtractor):
|
||||||
@ -277,3 +278,110 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
|||||||
entries, course_slug,
|
entries, course_slug,
|
||||||
course_data.get('title'),
|
course_data.get('title'),
|
||||||
course_data.get('description'))
|
course_data.get('description'))
|
||||||
|
|
||||||
|
|
||||||
|
class LinkedInEventsIE(LinkedInBaseIE):
|
||||||
|
IE_NAME = 'linkedin:events'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/events/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.linkedin.com/events/7084656651378536448/comments/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7084656651378536448',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#37 Aprende a hacer una entrevista en inglés para tu próximo trabajo remoto',
|
||||||
|
'description': '¡Agarra para anotar que se viene tremendo evento!',
|
||||||
|
'duration': 1765,
|
||||||
|
'timestamp': 1689113772,
|
||||||
|
'upload_date': '20230711',
|
||||||
|
'release_timestamp': 1689174012,
|
||||||
|
'release_date': '20230712',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.linkedin.com/events/27-02energyfreedombyenergyclub7295762520814874625/comments/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '27-02energyfreedombyenergyclub7295762520814874625',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '27.02 Energy Freedom by Energy Club',
|
||||||
|
'description': 'md5:1292e6f31df998914c293787a02c3b91',
|
||||||
|
'duration': 6420,
|
||||||
|
'timestamp': 1739445333,
|
||||||
|
'upload_date': '20250213',
|
||||||
|
'release_timestamp': 1740657620,
|
||||||
|
'release_date': '20250227',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if not self._get_cookies('https://www.linkedin.com/').get('li_at'):
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
event_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, event_id)
|
||||||
|
|
||||||
|
base_data = traverse_obj(webpage, (
|
||||||
|
{find_elements(tag='code', attr='style', value='display: none')}, ..., {json.loads}, 'included', ...))
|
||||||
|
meta_data = traverse_obj(base_data, (
|
||||||
|
lambda _, v: v['$type'] == 'com.linkedin.voyager.dash.events.ProfessionalEvent', any)) or {}
|
||||||
|
|
||||||
|
live_status = {
|
||||||
|
'PAST': 'was_live',
|
||||||
|
'ONGOING': 'is_live',
|
||||||
|
'FUTURE': 'is_upcoming',
|
||||||
|
}.get(meta_data.get('lifecycleState'))
|
||||||
|
|
||||||
|
if live_status == 'is_upcoming':
|
||||||
|
player_data = {}
|
||||||
|
if event_time := traverse_obj(meta_data, ('displayEventTime', {str})):
|
||||||
|
message = f'This live event is scheduled for {event_time}'
|
||||||
|
else:
|
||||||
|
message = 'This live event has not yet started'
|
||||||
|
self.raise_no_formats(message, expected=True, video_id=event_id)
|
||||||
|
else:
|
||||||
|
# TODO: Add support for audio-only live events
|
||||||
|
player_data = traverse_obj(base_data, (
|
||||||
|
lambda _, v: v['$type'] == 'com.linkedin.videocontent.VideoPlayMetadata',
|
||||||
|
any, {require('video player data')}))
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
for prog_fmts in traverse_obj(player_data, ('progressiveStreams', ..., {dict})):
|
||||||
|
for fmt_url in traverse_obj(prog_fmts, ('streamingLocations', ..., 'url', {url_or_none})):
|
||||||
|
formats.append({
|
||||||
|
'url': fmt_url,
|
||||||
|
**traverse_obj(prog_fmts, {
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
'tbr': ('bitRate', {int_or_none(scale=1000)}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
'ext': ('mediaType', {mimetype2ext}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
for m3u8_url in traverse_obj(player_data, (
|
||||||
|
'adaptiveStreams', lambda _, v: v['protocol'] == 'HLS', 'masterPlaylists', ..., 'url', {url_or_none},
|
||||||
|
)):
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url, event_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': event_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'live_status': live_status,
|
||||||
|
**traverse_obj(meta_data, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', 'text', {str}),
|
||||||
|
'timestamp': ('createdAt', {int_or_none(scale=1000)}),
|
||||||
|
# timeRange.start is available when the stream is_upcoming
|
||||||
|
'release_timestamp': ('timeRange', 'start', {int_or_none(scale=1000)}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(player_data, {
|
||||||
|
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||||
|
# liveStreamCreatedAt is only available when the stream is_live or was_live
|
||||||
|
'release_timestamp': ('liveStreamCreatedAt', {int_or_none(scale=1000)}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|||||||
@ -365,13 +365,15 @@ mutation initPlaybackSession(
|
|||||||
'All videos are only available to registered users', method='password')
|
'All videos are only available to registered users', method='password')
|
||||||
|
|
||||||
def _set_device_id(self, username):
|
def _set_device_id(self, username):
|
||||||
if not self._device_id:
|
if self._device_id:
|
||||||
self._device_id = self.cache.load(
|
return
|
||||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
device_id_cache = self.cache.load(self._NETRC_MACHINE, 'device_ids', default={})
|
||||||
|
self._device_id = device_id_cache.get(username)
|
||||||
if self._device_id:
|
if self._device_id:
|
||||||
return
|
return
|
||||||
self._device_id = str(uuid.uuid4())
|
self._device_id = str(uuid.uuid4())
|
||||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
device_id_cache[username] = self._device_id
|
||||||
|
self.cache.store(self._NETRC_MACHINE, 'device_ids', device_id_cache)
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
@ -24,7 +23,6 @@ from ..utils import (
|
|||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
remove_start,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
@ -34,13 +32,70 @@ from ..utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import find_element, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NiconicoIE(InfoExtractor):
|
class NiconicoBaseIE(InfoExtractor):
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_GEO_COUNTRIES = ['JP']
|
||||||
|
_LOGIN_BASE = 'https://account.nicovideo.jp'
|
||||||
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_logged_in(self):
|
||||||
|
return bool(self._get_cookies('https://www.nicovideo.jp').get('user_session'))
|
||||||
|
|
||||||
|
def _raise_login_error(self, message, expected=True):
|
||||||
|
raise ExtractorError(f'Unable to login: {message}', expected=expected)
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if self.is_logged_in:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._request_webpage(
|
||||||
|
f'{self._LOGIN_BASE}/login', None, 'Requesting session cookies')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
f'{self._LOGIN_BASE}/login/redirector', None,
|
||||||
|
'Logging in', 'Unable to log in', headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Referer': f'{self._LOGIN_BASE}/login',
|
||||||
|
}, data=urlencode_postdata({
|
||||||
|
'mail_tel': username,
|
||||||
|
'password': password,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if self.is_logged_in:
|
||||||
|
return
|
||||||
|
elif err_msg := traverse_obj(webpage, (
|
||||||
|
{find_element(cls='notice error')}, {find_element(cls='notice__text')}, {clean_html},
|
||||||
|
)):
|
||||||
|
self._raise_login_error(err_msg or 'Invalid username or password')
|
||||||
|
elif 'oneTimePw' in webpage:
|
||||||
|
post_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, 'post url', group='url')
|
||||||
|
mfa, urlh = self._download_webpage_handle(
|
||||||
|
urljoin(self._LOGIN_BASE, post_url), None,
|
||||||
|
'Performing MFA', 'Unable to complete MFA', headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}, data=urlencode_postdata({
|
||||||
|
'otp': self._get_tfa_info('6 digit number shown on app'),
|
||||||
|
}))
|
||||||
|
if self.is_logged_in:
|
||||||
|
return
|
||||||
|
elif 'error-code' in parse_qs(urlh.url):
|
||||||
|
err_msg = traverse_obj(mfa, ({find_element(cls='pageMainMsg')}, {clean_html}))
|
||||||
|
self._raise_login_error(err_msg or 'MFA session expired')
|
||||||
|
elif 'formError' in mfa:
|
||||||
|
err_msg = traverse_obj(mfa, (
|
||||||
|
{find_element(cls='formError')}, {find_element(tag='div')}, {clean_html}))
|
||||||
|
self._raise_login_error(err_msg or 'MFA challenge failed')
|
||||||
|
|
||||||
|
self._raise_login_error('Unexpected login error', expected=False)
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoIE(NiconicoBaseIE):
|
||||||
IE_NAME = 'niconico'
|
IE_NAME = 'niconico'
|
||||||
IE_DESC = 'ニコニコ動画'
|
IE_DESC = 'ニコニコ動画'
|
||||||
_GEO_COUNTRIES = ['JP']
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
@ -180,229 +235,6 @@ class NiconicoIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
|
||||||
_API_HEADERS = {
|
|
||||||
'X-Frontend-ID': '6',
|
|
||||||
'X-Frontend-Version': '0',
|
|
||||||
'X-Niconico-Language': 'en-us',
|
|
||||||
'Referer': 'https://www.nicovideo.jp/',
|
|
||||||
'Origin': 'https://www.nicovideo.jp',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
|
||||||
login_ok = True
|
|
||||||
login_form_strs = {
|
|
||||||
'mail_tel': username,
|
|
||||||
'password': password,
|
|
||||||
}
|
|
||||||
self._request_webpage(
|
|
||||||
'https://account.nicovideo.jp/login', None,
|
|
||||||
note='Acquiring Login session')
|
|
||||||
page = self._download_webpage(
|
|
||||||
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
|
|
||||||
note='Logging in', errnote='Unable to log in',
|
|
||||||
data=urlencode_postdata(login_form_strs),
|
|
||||||
headers={
|
|
||||||
'Referer': 'https://account.nicovideo.jp/login',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
if 'oneTimePw' in page:
|
|
||||||
post_url = self._search_regex(
|
|
||||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
|
|
||||||
page = self._download_webpage(
|
|
||||||
urljoin('https://account.nicovideo.jp', post_url), None,
|
|
||||||
note='Performing MFA', errnote='Unable to complete MFA',
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'otp': self._get_tfa_info('6 digits code'),
|
|
||||||
}), headers={
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
if 'oneTimePw' in page or 'formError' in page:
|
|
||||||
err_msg = self._html_search_regex(
|
|
||||||
r'formError["\']+>(.*?)</div>', page, 'form_error',
|
|
||||||
default='There\'s an error but the message can\'t be parsed.',
|
|
||||||
flags=re.DOTALL)
|
|
||||||
self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
|
|
||||||
return False
|
|
||||||
login_ok = 'class="notice error"' not in page
|
|
||||||
if not login_ok:
|
|
||||||
self.report_warning('Unable to log in: bad username or password')
|
|
||||||
return login_ok
|
|
||||||
|
|
||||||
def _get_heartbeat_info(self, info_dict):
|
|
||||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
|
||||||
dmc_protocol = info_dict['expected_protocol']
|
|
||||||
|
|
||||||
api_data = (
|
|
||||||
info_dict.get('_api_data')
|
|
||||||
or self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
'data-api-data="([^"]+)"',
|
|
||||||
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
|
|
||||||
'API data', default='{}'),
|
|
||||||
video_id))
|
|
||||||
|
|
||||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
|
||||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
|
||||||
|
|
||||||
def ping():
|
|
||||||
tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
|
|
||||||
if tracking_id:
|
|
||||||
tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
|
|
||||||
watch_request_response = self._download_json(
|
|
||||||
tracking_url, video_id,
|
|
||||||
note='Acquiring permission for downloading video', fatal=False,
|
|
||||||
headers=self._API_HEADERS)
|
|
||||||
if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
|
|
||||||
self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
|
|
||||||
|
|
||||||
yesno = lambda x: 'yes' if x else 'no'
|
|
||||||
|
|
||||||
if dmc_protocol == 'http':
|
|
||||||
protocol = 'http'
|
|
||||||
protocol_parameters = {
|
|
||||||
'http_output_download_parameters': {
|
|
||||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
|
||||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif dmc_protocol == 'hls':
|
|
||||||
protocol = 'm3u8'
|
|
||||||
segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
|
|
||||||
parsed_token = self._parse_json(session_api_data['token'], video_id)
|
|
||||||
encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
|
|
||||||
protocol_parameters = {
|
|
||||||
'hls_parameters': {
|
|
||||||
'segment_duration': segment_duration,
|
|
||||||
'transfer_preset': '',
|
|
||||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
|
||||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if 'hls_encryption' in parsed_token and encryption:
|
|
||||||
protocol_parameters['hls_parameters']['encryption'] = {
|
|
||||||
parsed_token['hls_encryption']: {
|
|
||||||
'encrypted_key': encryption['encryptedKey'],
|
|
||||||
'key_uri': encryption['keyUri'],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
protocol = 'm3u8_native'
|
|
||||||
else:
|
|
||||||
raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
|
|
||||||
|
|
||||||
session_response = self._download_json(
|
|
||||||
session_api_endpoint['url'], video_id,
|
|
||||||
query={'_format': 'json'},
|
|
||||||
headers={'Content-Type': 'application/json'},
|
|
||||||
note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
|
|
||||||
data=json.dumps({
|
|
||||||
'session': {
|
|
||||||
'client_info': {
|
|
||||||
'player_id': session_api_data.get('playerId'),
|
|
||||||
},
|
|
||||||
'content_auth': {
|
|
||||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
|
||||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
|
||||||
'service_id': 'nicovideo',
|
|
||||||
'service_user_id': session_api_data.get('serviceUserId'),
|
|
||||||
},
|
|
||||||
'content_id': session_api_data.get('contentId'),
|
|
||||||
'content_src_id_sets': [{
|
|
||||||
'content_src_ids': [{
|
|
||||||
'src_id_to_mux': {
|
|
||||||
'audio_src_ids': [audio_src_id],
|
|
||||||
'video_src_ids': [video_src_id],
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
}],
|
|
||||||
'content_type': 'movie',
|
|
||||||
'content_uri': '',
|
|
||||||
'keep_method': {
|
|
||||||
'heartbeat': {
|
|
||||||
'lifetime': session_api_data.get('heartbeatLifetime'),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'priority': session_api_data['priority'],
|
|
||||||
'protocol': {
|
|
||||||
'name': 'http',
|
|
||||||
'parameters': {
|
|
||||||
'http_parameters': {
|
|
||||||
'parameters': protocol_parameters,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'recipe_id': session_api_data.get('recipeId'),
|
|
||||||
'session_operation_auth': {
|
|
||||||
'session_operation_auth_by_signature': {
|
|
||||||
'signature': session_api_data.get('signature'),
|
|
||||||
'token': session_api_data.get('token'),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'timing_constraint': 'unlimited',
|
|
||||||
},
|
|
||||||
}).encode())
|
|
||||||
|
|
||||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
|
||||||
info_dict['protocol'] = protocol
|
|
||||||
|
|
||||||
# get heartbeat info
|
|
||||||
heartbeat_info_dict = {
|
|
||||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
|
||||||
'data': json.dumps(session_response['data']),
|
|
||||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
|
||||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
|
||||||
'ping': ping,
|
|
||||||
}
|
|
||||||
|
|
||||||
return info_dict, heartbeat_info_dict
|
|
||||||
|
|
||||||
def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
|
|
||||||
|
|
||||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
|
||||||
return None
|
|
||||||
|
|
||||||
format_id = '-'.join(
|
|
||||||
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
|
|
||||||
|
|
||||||
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
|
|
||||||
'format_id': format_id,
|
|
||||||
'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
|
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
|
||||||
'acodec': 'aac',
|
|
||||||
'vcodec': 'h264',
|
|
||||||
**traverse_obj(audio_quality, ('metadata', {
|
|
||||||
'abr': ('bitrate', {float_or_none(scale=1000)}),
|
|
||||||
'asr': ('samplingRate', {int_or_none}),
|
|
||||||
})),
|
|
||||||
**traverse_obj(video_quality, ('metadata', {
|
|
||||||
'vbr': ('bitrate', {float_or_none(scale=1000)}),
|
|
||||||
'height': ('resolution', 'height', {int_or_none}),
|
|
||||||
'width': ('resolution', 'width', {int_or_none}),
|
|
||||||
})),
|
|
||||||
'quality': -2 if 'low' in video_quality['id'] else None,
|
|
||||||
'protocol': 'niconico_dmc',
|
|
||||||
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
|
|
||||||
'http_headers': {
|
|
||||||
'Origin': 'https://www.nicovideo.jp',
|
|
||||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _yield_dmc_formats(self, api_data, video_id):
|
|
||||||
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
|
|
||||||
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
|
|
||||||
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
|
|
||||||
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
|
|
||||||
if not all((audios, videos, protocols)):
|
|
||||||
return
|
|
||||||
|
|
||||||
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
|
|
||||||
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
|
|
||||||
yield fmt
|
|
||||||
|
|
||||||
def _yield_dms_formats(self, api_data, video_id):
|
def _yield_dms_formats(self, api_data, video_id):
|
||||||
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
||||||
@ -485,8 +317,8 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'needs_premium': ('isPremium', {bool}),
|
'needs_premium': ('isPremium', {bool}),
|
||||||
'needs_subscription': ('isAdmission', {bool}),
|
'needs_subscription': ('isAdmission', {bool}),
|
||||||
})) or {'needs_auth': True}))
|
})) or {'needs_auth': True}))
|
||||||
formats = [*self._yield_dmc_formats(api_data, video_id),
|
|
||||||
*self._yield_dms_formats(api_data, video_id)]
|
formats = list(self._yield_dms_formats(api_data, video_id))
|
||||||
if not formats:
|
if not formats:
|
||||||
fail_msg = clean_html(self._html_search_regex(
|
fail_msg = clean_html(self._html_search_regex(
|
||||||
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
||||||
@ -921,7 +753,7 @@ class NiconicoUserIE(InfoExtractor):
|
|||||||
return self.playlist_result(self._entries(list_id), list_id)
|
return self.playlist_result(self._entries(list_id), list_id)
|
||||||
|
|
||||||
|
|
||||||
class NiconicoLiveIE(InfoExtractor):
|
class NiconicoLiveIE(NiconicoBaseIE):
|
||||||
IE_NAME = 'niconico:live'
|
IE_NAME = 'niconico:live'
|
||||||
IE_DESC = 'ニコニコ生放送'
|
IE_DESC = 'ニコニコ生放送'
|
||||||
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
||||||
|
|||||||
@ -321,6 +321,27 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
'timestamp': 1348495020,
|
'timestamp': 1348495020,
|
||||||
'upload_date': '20120924',
|
'upload_date': '20120924',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# checking program_info gives false positive for DRM
|
||||||
|
'url': 'https://www.raiplay.it/video/2022/10/Ad-ogni-costo---Un-giorno-in-Pretura---Puntata-del-15102022-1dfd1295-ea38-4bac-b51e-f87e2881693b.html',
|
||||||
|
'md5': '572c6f711b7c5f2d670ba419b4ae3b08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1dfd1295-ea38-4bac-b51e-f87e2881693b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ad ogni costo - Un giorno in Pretura - Puntata del 15/10/2022',
|
||||||
|
'alt_title': 'St 2022/23 - Un giorno in pretura - Ad ogni costo',
|
||||||
|
'description': 'md5:4046d97b2687f74f06a8b8270ba5599f',
|
||||||
|
'uploader': 'Rai 3',
|
||||||
|
'duration': 3773.0,
|
||||||
|
'thumbnail': 'https://www.raiplay.it/dl/img/2022/10/12/1665586539957_2048x2048.png',
|
||||||
|
'creators': ['Rai 3'],
|
||||||
|
'series': 'Un giorno in pretura',
|
||||||
|
'season': '2022/23',
|
||||||
|
'episode': 'Ad ogni costo',
|
||||||
|
'timestamp': 1665507240,
|
||||||
|
'upload_date': '20221011',
|
||||||
|
'release_year': 2025,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -340,8 +361,7 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
f'{base}.json', video_id, 'Downloading video JSON')
|
f'{base}.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
if not self.get_param('allow_unplayable_formats'):
|
if traverse_obj(media, ('rights_management', 'rights', 'drm')):
|
||||||
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
|
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
|||||||
@ -388,7 +388,8 @@ class RedditIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, video_id, **info)
|
return self.playlist_result(entries, video_id, **info)
|
||||||
raise ExtractorError('No media found', expected=True)
|
self.raise_no_formats('No media found', expected=True, video_id=video_id)
|
||||||
|
return {**info, 'id': video_id}
|
||||||
|
|
||||||
# Check if media is hosted on reddit:
|
# Check if media is hosted on reddit:
|
||||||
reddit_video = traverse_obj(data, (
|
reddit_video = traverse_obj(data, (
|
||||||
|
|||||||
@ -2,12 +2,13 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .jwplatform import JWPlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
|
||||||
js_to_json,
|
js_to_json,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import find_element, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class TV2DKIE(InfoExtractor):
|
class TV2DKIE(InfoExtractor):
|
||||||
@ -21,35 +22,46 @@ class TV2DKIE(InfoExtractor):
|
|||||||
tv2fyn|
|
tv2fyn|
|
||||||
tv2east|
|
tv2east|
|
||||||
tv2lorry|
|
tv2lorry|
|
||||||
tv2nord
|
tv2nord|
|
||||||
|
tv2kosmopol
|
||||||
)\.dk/
|
)\.dk/
|
||||||
(:[^/]+/)*
|
(?:[^/?#]+/)*
|
||||||
(?P<id>[^/?\#&]+)
|
(?P<id>[^/?\#&]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
|
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0_52jmwa0p',
|
'id': 'sPp5z21q',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '19:30 - 28. okt. 2019',
|
'title': '19:30 - 28. okt. 2019',
|
||||||
'timestamp': 1572290248,
|
'description': '',
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sPp5z21q/poster.jpg?width=720',
|
||||||
|
'timestamp': 1572287400,
|
||||||
'upload_date': '20191028',
|
'upload_date': '20191028',
|
||||||
'uploader_id': 'tvsyd',
|
|
||||||
'duration': 1347,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_7iwll9n0',
|
'id': 'oD9cyq0m',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20211027',
|
|
||||||
'title': 'Gadekamp #6 - Højhuse i København',
|
'title': 'Gadekamp #6 - Højhuse i København',
|
||||||
'uploader_id': 'tv2lorry',
|
'description': '',
|
||||||
'timestamp': 1635345229,
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/oD9cyq0m/poster.jpg?width=720',
|
||||||
|
'timestamp': 1635348600,
|
||||||
|
'upload_date': '20211027',
|
||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
}, {
|
||||||
|
'url': 'https://www.tvsyd.dk/haderslev/x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tv2ostjylland.dk/aarhus/dom-kan-fa-alvorlige-konsekvenser',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dom-kan-fa-alvorlige-konsekvenser',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
|
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -71,40 +83,22 @@ class TV2DKIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
|
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tv2kosmopol.dk/metropolen/chaufforer-beordres-til-at-kore-videre-i-ulovlige-busser-med-rode-advarselslamper',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
search_space = traverse_obj(webpage, {find_element(tag='article')}) or webpage
|
||||||
|
|
||||||
entries = []
|
player_ids = traverse_obj(
|
||||||
|
re.findall(r'x-data="(?:video_player|simple_player)\(({[^"]+})', search_space),
|
||||||
|
(..., {js_to_json}, {json.loads}, ('jwpMediaId', 'videoId'), {str}))
|
||||||
|
|
||||||
def add_entry(partner_id, kaltura_id):
|
return self.playlist_from_matches(
|
||||||
entries.append(self.url_result(
|
player_ids, video_id, getter=lambda x: f'jwplatform:{x}', ie=JWPlatformIE)
|
||||||
f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura',
|
|
||||||
video_id=kaltura_id))
|
|
||||||
|
|
||||||
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
|
||||||
video = extract_attributes(video_el)
|
|
||||||
kaltura_id = video.get('data-entryid')
|
|
||||||
if not kaltura_id:
|
|
||||||
continue
|
|
||||||
partner_id = video.get('data-partnerid')
|
|
||||||
if not partner_id:
|
|
||||||
continue
|
|
||||||
add_entry(partner_id, kaltura_id)
|
|
||||||
if not entries:
|
|
||||||
kaltura_id = self._search_regex(
|
|
||||||
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
|
||||||
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
|
||||||
'partner id')
|
|
||||||
add_entry(partner_id, kaltura_id)
|
|
||||||
if len(entries) == 1:
|
|
||||||
return entries[0]
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
|
|
||||||
class TV2DKBornholmPlayIE(InfoExtractor):
|
class TV2DKBornholmPlayIE(InfoExtractor):
|
||||||
|
|||||||
@ -1225,8 +1225,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
|||||||
'channel_id': ('broadcaster', 'id', {str}),
|
'channel_id': ('broadcaster', 'id', {str}),
|
||||||
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
|
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
|
||||||
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
|
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
|
||||||
'uploader': ('broadcaster', 'displayName', {str}),
|
'uploader': ('curator', 'displayName', {str}),
|
||||||
'uploader_id': ('broadcaster', 'id', {str}),
|
'uploader_id': ('curator', 'id', {str}),
|
||||||
'categories': ('game', 'displayName', {str}, filter, all, filter),
|
'categories': ('game', 'displayName', {str}, filter, all, filter),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1221,20 +1221,10 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||||
|
_GRAPHQL_ENDPOINT = '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||||
@property
|
|
||||||
def _GRAPHQL_ENDPOINT(self):
|
|
||||||
if self.is_logged_in:
|
|
||||||
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
|
||||||
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
|
||||||
|
|
||||||
def _graphql_to_legacy(self, data, twid):
|
def _graphql_to_legacy(self, data, twid):
|
||||||
result = traverse_obj(data, (
|
result = traverse_obj(data, ('tweetResult', 'result', {dict})) or {}
|
||||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
|
||||||
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
|
|
||||||
'tweet_results', 'result', ('tweet', None), {dict},
|
|
||||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
|
||||||
data, ('tweetResult', 'result', {dict}), default={})
|
|
||||||
|
|
||||||
typename = result.get('__typename')
|
typename = result.get('__typename')
|
||||||
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||||
@ -1278,37 +1268,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
|
|
||||||
def _build_graphql_query(self, media_id):
|
def _build_graphql_query(self, media_id):
|
||||||
return {
|
return {
|
||||||
'variables': {
|
|
||||||
'focalTweetId': media_id,
|
|
||||||
'includePromotedContent': True,
|
|
||||||
'with_rux_injections': False,
|
|
||||||
'withBirdwatchNotes': True,
|
|
||||||
'withCommunity': True,
|
|
||||||
'withDownvotePerspective': False,
|
|
||||||
'withQuickPromoteEligibilityTweetFields': True,
|
|
||||||
'withReactionsMetadata': False,
|
|
||||||
'withReactionsPerspective': False,
|
|
||||||
'withSuperFollowsTweetFields': True,
|
|
||||||
'withSuperFollowsUserFields': True,
|
|
||||||
'withV2Timeline': True,
|
|
||||||
'withVoice': True,
|
|
||||||
},
|
|
||||||
'features': {
|
|
||||||
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
|
|
||||||
'interactive_text_enabled': True,
|
|
||||||
'responsive_web_edit_tweet_api_enabled': True,
|
|
||||||
'responsive_web_enhance_cards_enabled': True,
|
|
||||||
'responsive_web_graphql_timeline_navigation_enabled': False,
|
|
||||||
'responsive_web_text_conversations_enabled': False,
|
|
||||||
'responsive_web_uc_gql_enabled': True,
|
|
||||||
'standardized_nudges_misinfo': True,
|
|
||||||
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
|
||||||
'tweetypie_unmention_optimization_enabled': True,
|
|
||||||
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
|
|
||||||
'verified_phone_label_enabled': False,
|
|
||||||
'vibe_api_enabled': True,
|
|
||||||
},
|
|
||||||
} if self.is_logged_in else {
|
|
||||||
'variables': {
|
'variables': {
|
||||||
'tweetId': media_id,
|
'tweetId': media_id,
|
||||||
'withCommunity': False,
|
'withCommunity': False,
|
||||||
@ -1717,21 +1676,22 @@ class TwitterSpacesIE(TwitterBaseIE):
|
|||||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
|
'url': 'https://twitter.com/i/spaces/1OwxWwQOPlNxQ',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1RDxlgyvNXzJL',
|
'id': '1OwxWwQOPlNxQ',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
|
'title': 'Everybody in: @mtbarra & @elonmusk discuss the future of EV charging',
|
||||||
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
|
'description': 'Twitter Space participated by Elon Musk',
|
||||||
'uploader': r're:Lucio Di Gaetano.*?',
|
|
||||||
'uploader_id': 'luciodigaetano',
|
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
'timestamp': 1659877956,
|
'release_date': '20230608',
|
||||||
'upload_date': '20220807',
|
'release_timestamp': 1686256230,
|
||||||
'release_timestamp': 1659904215,
|
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||||
'release_date': '20220807',
|
'timestamp': 1686254250,
|
||||||
|
'upload_date': '20230608',
|
||||||
|
'uploader': 'Mary Barra',
|
||||||
|
'uploader_id': 'mtbarra',
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# post_live/TimedOut but downloadable
|
# post_live/TimedOut but downloadable
|
||||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||||
@ -1743,9 +1703,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
|||||||
'uploader': 'Google Cloud',
|
'uploader': 'Google Cloud',
|
||||||
'uploader_id': 'googlecloud',
|
'uploader_id': 'googlecloud',
|
||||||
'live_status': 'post_live',
|
'live_status': 'post_live',
|
||||||
|
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||||
'timestamp': 1681409554,
|
'timestamp': 1681409554,
|
||||||
'upload_date': '20230413',
|
'upload_date': '20230413',
|
||||||
'release_timestamp': 1681839000,
|
'release_timestamp': 1681839082,
|
||||||
'release_date': '20230418',
|
'release_date': '20230418',
|
||||||
'protocol': 'm3u8', # ffmpeg is forced
|
'protocol': 'm3u8', # ffmpeg is forced
|
||||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||||
@ -1762,6 +1723,9 @@ class TwitterSpacesIE(TwitterBaseIE):
|
|||||||
'uploader': '息根とめる',
|
'uploader': '息根とめる',
|
||||||
'uploader_id': 'tomeru_ikinone',
|
'uploader_id': 'tomeru_ikinone',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20230601',
|
||||||
|
'release_timestamp': 1685617200,
|
||||||
|
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||||
'timestamp': 1685617198,
|
'timestamp': 1685617198,
|
||||||
'upload_date': '20230601',
|
'upload_date': '20230601',
|
||||||
'protocol': 'm3u8', # ffmpeg is forced
|
'protocol': 'm3u8', # ffmpeg is forced
|
||||||
@ -1779,9 +1743,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
|||||||
'uploader': 'Candace Owens',
|
'uploader': 'Candace Owens',
|
||||||
'uploader_id': 'RealCandaceO',
|
'uploader_id': 'RealCandaceO',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
|
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||||
'timestamp': 1723931351,
|
'timestamp': 1723931351,
|
||||||
'upload_date': '20240817',
|
'upload_date': '20240817',
|
||||||
'release_timestamp': 1723932000,
|
'release_timestamp': 1723932056,
|
||||||
'release_date': '20240817',
|
'release_date': '20240817',
|
||||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||||
},
|
},
|
||||||
@ -1861,18 +1826,21 @@ class TwitterSpacesIE(TwitterBaseIE):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': space_id,
|
'id': space_id,
|
||||||
'title': metadata.get('title'),
|
|
||||||
'description': f'Twitter Space participated by {participants}',
|
'description': f'Twitter Space participated by {participants}',
|
||||||
'uploader': traverse_obj(
|
|
||||||
metadata, ('creator_results', 'result', 'legacy', 'name')),
|
|
||||||
'uploader_id': traverse_obj(
|
|
||||||
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
|
||||||
'live_status': live_status,
|
|
||||||
'release_timestamp': try_call(
|
|
||||||
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
|
|
||||||
'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
|
'live_status': live_status,
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
# started_at is None when stream is_upcoming so fallback to scheduled_start for --wait-for-video
|
||||||
|
'release_timestamp': (('started_at', 'scheduled_start'), {int_or_none(scale=1000)}, any),
|
||||||
|
'timestamp': ('created_at', {int_or_none(scale=1000)}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(metadata, ('creator_results', 'result', 'legacy', {
|
||||||
|
'uploader': ('name', {str}),
|
||||||
|
'uploader_id': ('screen_name', {str_or_none}),
|
||||||
|
'thumbnail': ('profile_image_url_https', {lambda x: x.replace('_normal', '_400x400')}, {url_or_none}),
|
||||||
|
})),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
0
yt_dlp/extractor/vk.py
Executable file → Normal file
0
yt_dlp/extractor/vk.py
Executable file → Normal file
@ -417,6 +417,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
_NETRC_MACHINE = 'youtube'
|
_NETRC_MACHINE = 'youtube'
|
||||||
|
|
||||||
|
_COOKIE_HOWTO_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies'
|
||||||
|
|
||||||
def ucid_or_none(self, ucid):
|
def ucid_or_none(self, ucid):
|
||||||
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
|
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
|
||||||
|
|
||||||
@ -451,17 +453,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
return preferred_lang
|
return preferred_lang
|
||||||
|
|
||||||
def _initialize_consent(self):
|
def _initialize_consent(self):
|
||||||
cookies = self._get_cookies('https://www.youtube.com/')
|
if self._has_auth_cookies:
|
||||||
if cookies.get('__Secure-3PSID'):
|
|
||||||
return
|
return
|
||||||
socs = cookies.get('SOCS')
|
socs = self._youtube_cookies.get('SOCS')
|
||||||
if socs and not socs.value.startswith('CAA'): # not consented
|
if socs and not socs.value.startswith('CAA'): # not consented
|
||||||
return
|
return
|
||||||
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
||||||
|
|
||||||
def _initialize_pref(self):
|
def _initialize_pref(self):
|
||||||
cookies = self._get_cookies('https://www.youtube.com/')
|
pref_cookie = self._youtube_cookies.get('PREF')
|
||||||
pref_cookie = cookies.get('PREF')
|
|
||||||
pref = {}
|
pref = {}
|
||||||
if pref_cookie:
|
if pref_cookie:
|
||||||
try:
|
try:
|
||||||
@ -472,8 +472,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||||
|
|
||||||
def _initialize_cookie_auth(self):
|
def _initialize_cookie_auth(self):
|
||||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
self._passed_auth_cookies = False
|
||||||
if yt_sapisid or yt_1psapisid or yt_3psapisid:
|
if self._has_auth_cookies:
|
||||||
|
self._passed_auth_cookies = True
|
||||||
self.write_debug('Found YouTube account cookies')
|
self.write_debug('Found YouTube account cookies')
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@ -492,8 +493,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def _youtube_login_hint(self):
|
def _youtube_login_hint(self):
|
||||||
return (f'{self._login_hint(method="cookies")}. Also see '
|
return (f'{self._login_hint(method="cookies")}. Also see {self._COOKIE_HOWTO_WIKI_URL} '
|
||||||
'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
|
|
||||||
'for tips on effectively exporting YouTube cookies')
|
'for tips on effectively exporting YouTube cookies')
|
||||||
|
|
||||||
def _check_login_required(self):
|
def _check_login_required(self):
|
||||||
@ -553,12 +553,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
return f'{scheme} {"_".join(parts)}'
|
return f'{scheme} {"_".join(parts)}'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _youtube_cookies(self):
|
||||||
|
return self._get_cookies('https://www.youtube.com')
|
||||||
|
|
||||||
def _get_sid_cookies(self):
|
def _get_sid_cookies(self):
|
||||||
"""
|
"""
|
||||||
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
||||||
@returns sapisid, 1psapisid, 3psapisid
|
@returns sapisid, 1psapisid, 3psapisid
|
||||||
"""
|
"""
|
||||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
yt_cookies = self._youtube_cookies
|
||||||
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
||||||
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
||||||
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
||||||
@ -595,6 +599,31 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
return ' '.join(authorizations)
|
return ' '.join(authorizations)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_authenticated(self):
|
||||||
|
return self._has_auth_cookies
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _has_auth_cookies(self):
|
||||||
|
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||||
|
# YouTube doesn't appear to clear 3PSAPISID when rotating cookies (as of 2025-04-26)
|
||||||
|
# But LOGIN_INFO is cleared and should exist if logged in
|
||||||
|
has_login_info = 'LOGIN_INFO' in self._youtube_cookies
|
||||||
|
return bool(has_login_info and (yt_sapisid or yt_1psapisid or yt_3psapisid))
|
||||||
|
|
||||||
|
def _request_webpage(self, *args, **kwargs):
|
||||||
|
response = super()._request_webpage(*args, **kwargs)
|
||||||
|
|
||||||
|
# Check that we are still logged-in and cookies have not rotated after every request
|
||||||
|
if getattr(self, '_passed_auth_cookies', None) and not self._has_auth_cookies:
|
||||||
|
self.report_warning(
|
||||||
|
'The provided YouTube account cookies are no longer valid. '
|
||||||
|
'They have likely been rotated in the browser as a security measure. '
|
||||||
|
f'For tips on how to effectively export YouTube cookies, refer to {self._COOKIE_HOWTO_WIKI_URL} .',
|
||||||
|
only_once=False)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page',
|
note='Downloading API JSON', errnote='Unable to download API page',
|
||||||
context=None, api_key=None, api_hostname=None, default_client='web'):
|
context=None, api_key=None, api_hostname=None, default_client='web'):
|
||||||
@ -695,10 +724,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||||
expected_type=str)
|
expected_type=str)
|
||||||
|
|
||||||
@functools.cached_property
|
|
||||||
def is_authenticated(self):
|
|
||||||
return bool(self._get_sid_authorization_header())
|
|
||||||
|
|
||||||
def extract_ytcfg(self, video_id, webpage):
|
def extract_ytcfg(self, video_id, webpage):
|
||||||
if not webpage:
|
if not webpage:
|
||||||
return {}
|
return {}
|
||||||
|
|||||||
@ -1982,7 +1982,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def _player_js_cache_key(self, player_url):
|
def _player_js_cache_key(self, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
|
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
|
||||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path)
|
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path) or next((
|
||||||
|
v for k, v in self._INVERSE_PLAYER_JS_VARIANT_MAP.items()
|
||||||
|
if re.fullmatch(re.escape(k).replace('en_US', r'[a-zA-Z0-9_]+'), player_path)), None)
|
||||||
if not variant:
|
if not variant:
|
||||||
self.write_debug(
|
self.write_debug(
|
||||||
f'Unable to determine player JS variant\n'
|
f'Unable to determine player JS variant\n'
|
||||||
@ -3648,6 +3650,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
||||||
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
||||||
reason += '. YouTube is requiring a captcha challenge before playback'
|
reason += '. YouTube is requiring a captcha challenge before playback'
|
||||||
|
elif "This content isn't available, try again later" in reason:
|
||||||
|
reason = (
|
||||||
|
f'{remove_end(reason.strip(), ".")}. {"Your account" if self.is_authenticated else "The current session"} '
|
||||||
|
f'has been rate-limited by YouTube for up to an hour. It is recommended to use `-t sleep` to add a delay '
|
||||||
|
f'between video requests to avoid exceeding the rate limit. For more information, refer to '
|
||||||
|
f'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#this-content-isnt-available-try-again-later'
|
||||||
|
)
|
||||||
self.raise_no_formats(reason, expected=True)
|
self.raise_no_formats(reason, expected=True)
|
||||||
|
|
||||||
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user