mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-04-22 16:46:03 +00:00
rewrite code to use json api
This commit is contained in:
parent
75fbe5dc83
commit
9174bc2a81
@ -1,123 +1,53 @@
|
|||||||
import re
|
import secrets
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggsBaseIE(InfoExtractor):
|
class EggsBaseIE(InfoExtractor):
|
||||||
def _parse_artist_name(self, webpage):
|
_API_HEADERS = {
|
||||||
artist = self._search_regex(
|
'Accept': '*/*',
|
||||||
r'<div[^>]+class=(["\'])artist_name\1[^>]*>([^<]+)</div>',
|
'apVersion': '8.2.00',
|
||||||
webpage, 'artist name', fatal=False, default=None, group=2)
|
'deviceName': 'Android',
|
||||||
if artist:
|
}
|
||||||
return artist.strip()
|
|
||||||
|
|
||||||
og_title = self._html_search_meta(['og:title'], webpage, 'og:title', default=None)
|
@staticmethod
|
||||||
if og_title:
|
def _generate_random_device_id():
|
||||||
artist_match = re.search(r'(?P<artist>[^()]+)(?:\([^)]*\))?のEggsページ', og_title)
|
return secrets.token_hex(8)
|
||||||
if artist_match:
|
|
||||||
return artist_match.group('artist').strip()
|
|
||||||
|
|
||||||
return 'Unknown Artist'
|
def _download_eggs_json(self, url, music_id):
|
||||||
|
headers = self._API_HEADERS.copy()
|
||||||
|
headers['deviceId'] = self._generate_random_device_id()
|
||||||
|
return self._download_json(url, video_id=music_id, headers=headers)
|
||||||
|
|
||||||
def _parse_single_song(self, url, webpage, song_id, default_artist='Unknown Artist'):
|
def _extract_music_info(self, data, song_id):
|
||||||
track_title = self._search_regex(
|
music_info = traverse_obj(data, {
|
||||||
r'<div[^>]+class=(["\'])product_name\1[^>]*>\s*<p>([^<]+)</p>',
|
'id': ('musicId', {str_or_none}, {lambda x: x or song_id}),
|
||||||
webpage, 'track title', fatal=False, default=None, group=2)
|
'title': ('musicTitle', {str}, {lambda x: x or 'Unknown Title'}),
|
||||||
|
'url': ('musicDataPath', {url_or_none}),
|
||||||
|
'uploader': ('artist', 'displayName', {str}, {lambda x: x or 'Unknown Artist'}),
|
||||||
|
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||||
|
'youtube_url': ('youtubeUrl', {url_or_none}),
|
||||||
|
'youtube_id': ('youtubeVideoId', {str_or_none}),
|
||||||
|
'source_type': ('sourceType', {int}),
|
||||||
|
'vcodec': (None, {lambda x: 'none'}),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
if not track_title:
|
if not music_info.get('url') and not (music_info.get('source_type') == 2 and music_info.get('youtube_url')):
|
||||||
page_title = self._search_regex(
|
raise ExtractorError('Audio URL not found (possibly an unsupported sourceType)', expected=True)
|
||||||
r'<title>(?P<title>[^<]+)</title>',
|
|
||||||
webpage, 'page title', fatal=False, default=None, group='title')
|
|
||||||
if page_title:
|
|
||||||
inner_match = re.search(r'「(?P<inner>[^」]+)」', page_title)
|
|
||||||
if inner_match:
|
|
||||||
track_title = inner_match.group('inner').strip()
|
|
||||||
|
|
||||||
if not track_title:
|
return music_info
|
||||||
track_title = 'Unknown Title'
|
|
||||||
|
|
||||||
artist = default_artist
|
|
||||||
if not artist or artist == 'Unknown Artist':
|
|
||||||
artist_regex = r'<span[^>]+class=(["\'])artist_name\1[^>]*>\s*<a[^>]*>([^<]+)</a>'
|
|
||||||
fallback_artist = self._search_regex(
|
|
||||||
artist_regex, webpage, 'artist name',
|
|
||||||
fatal=False, default=None, group=2)
|
|
||||||
if fallback_artist:
|
|
||||||
artist = fallback_artist.strip()
|
|
||||||
|
|
||||||
audio_url = self._search_regex(
|
|
||||||
r'<div[^>]+class=(["\'])[^"\']*player[^"\']*\1[^>]+data-src=(["\'])(?P<audio_url>[^"\']+)\2',
|
|
||||||
webpage, 'audio url', fatal=True, group='audio_url')
|
|
||||||
audio_url = url_or_none(unescapeHTML(audio_url))
|
|
||||||
if not audio_url:
|
|
||||||
raise ExtractorError('Invalid audio URL.', expected=True)
|
|
||||||
|
|
||||||
thumbnail = (
|
|
||||||
self._html_search_meta(['og:image'], webpage, 'thumbnail', default=None)
|
|
||||||
or self._search_regex(
|
|
||||||
r'<span[^>]*>\s*<img[^>]+src=(["\'])(?P<thumb>[^"\']+)\1',
|
|
||||||
webpage, 'thumbnail', fatal=False, default=None, group='thumb')
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': song_id,
|
|
||||||
'url': audio_url,
|
|
||||||
'title': track_title,
|
|
||||||
'uploader': artist,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _parse_artist_page(self, webpage, artist_id, artist_name):
|
|
||||||
song_blocks = re.findall(r'(?s)<li[^>]+id="songs\d+"[^>]*>.*?</li>', webpage)
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
for block in song_blocks:
|
|
||||||
audio_url = self._search_regex(
|
|
||||||
r'data-src=(["\'])(?P<url>https?://.*?\.(?:mp3|m4a).*?)\1',
|
|
||||||
block, 'audio url', fatal=False, default=None, group='url')
|
|
||||||
audio_url = url_or_none(unescapeHTML(audio_url))
|
|
||||||
if not audio_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
track_id = self._search_regex(
|
|
||||||
r'data-srcid=(["\'])(?P<id>[^"\'<>]+)\1',
|
|
||||||
block, 'track id', fatal=False, default=None, group='id')
|
|
||||||
if not track_id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = self._search_regex(
|
|
||||||
r'data-srcname=(["\'])(?P<title>[^"\']+)\1',
|
|
||||||
block, 'track title', fatal=False, default=None, group='title')
|
|
||||||
if not title:
|
|
||||||
title = 'Unknown Title'
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'<img[^>]+src=(["\'])(?P<th>[^"\']+)\1',
|
|
||||||
block, 'thumbnail', fatal=False, default=None, group='th')
|
|
||||||
|
|
||||||
entries.append({
|
|
||||||
'id': track_id,
|
|
||||||
'url': audio_url,
|
|
||||||
'title': title,
|
|
||||||
'uploader': artist_name,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
})
|
|
||||||
|
|
||||||
return entries
|
|
||||||
|
|
||||||
|
|
||||||
class EggsIE(EggsBaseIE):
|
class EggsIE(EggsBaseIE):
|
||||||
IE_NAME = 'eggs:single'
|
IE_NAME = 'eggs:single'
|
||||||
_VALID_URL = (
|
_VALID_URL = r'https?://eggs\.mu/artist/[^/]+/song/(?P<song_id>[^/]+)'
|
||||||
r'https?://(?:www\.)?eggs\.mu/artist/[^/]+/song/(?P<song_id>[^/]+)'
|
|
||||||
)
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -125,39 +55,105 @@ class EggsIE(EggsBaseIE):
|
|||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'シネマと信号',
|
'title': 'シネマと信号',
|
||||||
'uploader': 'Sunny Girl',
|
'uploader': 'Sunny Girl',
|
||||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
|
'source_type': 1,
|
||||||
|
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '80cLKA2wnoA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'KAMO「いい女だから」Audio',
|
||||||
|
'uploader': 'KAMO',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||||
|
'availability': 'public',
|
||||||
|
'uploader_id': '@KAMO_band',
|
||||||
|
'upload_date': '20240925',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 151,
|
||||||
|
'like_count': int,
|
||||||
|
'channel': 'KAMO',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||||
|
'tags': [],
|
||||||
|
'timestamp': 1727271121,
|
||||||
|
'age_limit': 0,
|
||||||
|
'categories': ['People & Blogs'],
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'params': {'skip_download': 'Youtube'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
song_id = self._match_valid_url(url).group('song_id')
|
||||||
song_id = mobj.group('song_id')
|
json_data = self._download_eggs_json(
|
||||||
webpage = self._download_webpage(url, song_id)
|
f'https://app-front-api.eggs.mu/v1/musics/{song_id}', music_id=song_id)
|
||||||
artist_name = self._parse_artist_name(webpage)
|
music_info = self._extract_music_info(json_data, song_id)
|
||||||
return self._parse_single_song(url, webpage, song_id, artist_name)
|
|
||||||
|
if music_info['source_type'] == 2 and music_info['youtube_url']:
|
||||||
|
return self.url_result(
|
||||||
|
music_info['youtube_url'], ie='Youtube', video_id=music_info['youtube_id'])
|
||||||
|
|
||||||
|
return music_info
|
||||||
|
|
||||||
|
|
||||||
class EggsArtistIE(EggsBaseIE):
|
class EggsArtistIE(EggsBaseIE):
|
||||||
IE_NAME = 'eggs:artist'
|
IE_NAME = 'eggs:artist'
|
||||||
_VALID_URL = (
|
_VALID_URL = r'https?://eggs\.mu/artist/(?P<artist_id>[^/]+)$'
|
||||||
r'https?://(?:www\.)?eggs\.mu/artist/(?P<artist_id>[^/]+)$'
|
|
||||||
)
|
_TESTS = [
|
||||||
_TESTS = [{
|
{
|
||||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '32_sunny_girl',
|
'id': '32_sunny_girl',
|
||||||
'title': 'Sunny Girl',
|
'title': 'Sunny Girl',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 18,
|
||||||
},
|
},
|
||||||
'playlist_count': 18,
|
{
|
||||||
}]
|
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'KAMO_3pband',
|
||||||
|
'title': 'KAMO',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
artist_id = self._match_valid_url(url).group('artist_id')
|
artist_id = self._match_valid_url(url).group('artist_id')
|
||||||
webpage = self._download_webpage(url, artist_id)
|
json_data = self._download_eggs_json(
|
||||||
artist_name = self._parse_artist_name(webpage)
|
f'https://app-front-api.eggs.mu/v1/artists/{artist_id}/musics', music_id=artist_id)
|
||||||
entries = self._parse_artist_page(webpage, artist_id, artist_name)
|
items = traverse_obj(json_data, 'data', default=[])
|
||||||
|
entries = []
|
||||||
|
display_name = None
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
music_info = self._extract_music_info(item, '')
|
||||||
|
if not music_info['id']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not display_name:
|
||||||
|
display_name = music_info['uploader']
|
||||||
|
|
||||||
|
if music_info['source_type'] == 2 and music_info['youtube_url']:
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
music_info['youtube_url'], ie='Youtube', video_id=music_info['youtube_id']))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not music_info.get('url'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append(music_info)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries,
|
entries,
|
||||||
playlist_id=artist_id,
|
playlist_id=artist_id,
|
||||||
playlist_title=artist_name,
|
playlist_title=display_name or artist_id)
|
||||||
)
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user