import re from .common import InfoExtractor from ..utils import ( ExtractorError, url_or_none, unescapeHTML, ) class EggsBaseIE(InfoExtractor): def _parse_artist_name(self, webpage): artist = self._search_regex( r']+class=(["\'])artist_name\1[^>]*>([^<]+)', webpage, 'artist name', fatal=False, default=None, group=2 ) if artist: return artist.strip() og_title = self._html_search_meta(['og:title'], webpage, 'og:title', default=None) if og_title: artist_match = re.search(r'(?P[^()]+)(?:\([^)]*\))?のEggsページ', og_title) if artist_match: return artist_match.group('artist').strip() return 'Unknown Artist' def _parse_single_song(self, url, webpage, default_artist='Unknown Artist'): song_id = self._search_regex( r'/song/(?P[^/?#&]+)', url, 'song id', fatal=False, default=None, group='id' ) track_title = self._search_regex( r']+class=(["\'])product_name\1[^>]*>\s*

([^<]+)

', webpage, 'track title', fatal=False, default=None, group=2 ) if not track_title: page_title = self._search_regex( r'(?P<title>[^<]+)', webpage, 'page title', fatal=False, default=None, group='title' ) if page_title: inner_match = re.search(r'「(?P[^」]+)」', page_title) if inner_match: track_title = inner_match.group('inner').strip() if not track_title: track_title = 'Unknown Title' artist = default_artist if not artist or artist == 'Unknown Artist': artist_regex = r']+class=(["\'])artist_name\1[^>]*>\s*]*>([^<]+)' fallback_artist = self._search_regex( artist_regex, webpage, 'artist name', fatal=False, default=None, group=2 ) if fallback_artist: artist = fallback_artist.strip() audio_url = self._search_regex( r']+class=(["\'])[^"\']*player[^"\']*\1[^>]+data-src=(["\'])(?P[^"\']+)\2', webpage, 'audio url', fatal=True, group='audio_url' ) audio_url = url_or_none(unescapeHTML(audio_url)) if not audio_url: raise ExtractorError('Invalid audio URL.', expected=True) thumbnail = ( self._html_search_meta(['og:image'], webpage, 'thumbnail', default=None) or self._search_regex( r']*>\s*]+src=(["\'])(?P[^"\']+)\1', webpage, 'thumbnail', fatal=False, default=None, group='thumb' ) ) return { 'id': song_id, 'url': audio_url, 'title': track_title, 'uploader': artist, 'vcodec': 'none', 'thumbnail': thumbnail, } def _parse_artist_page(self, webpage, artist_id, artist_name): song_blocks = re.findall(r'(?s)]+id="songs\d+"[^>]*>.*?', webpage) entries = [] for block in song_blocks: audio_url = self._search_regex( r'data-src=(["\'])(?Phttps?://.*?\.(?:mp3|m4a).*?)\1', block, 'audio url', fatal=False, default=None, group='url' ) audio_url = url_or_none(unescapeHTML(audio_url)) if not audio_url: continue track_id = self._search_regex( r'data-srcid=(["\'])(?P[^"\'<>]+)\1', block, 'track id', fatal=False, default=None, group='id' ) if not track_id: continue title = self._search_regex( r'data-srcname=(["\'])(?P[^"\']+)\1', block, 'track title', fatal=False, default=None, group='title' ) if not title: title = 'Unknown Title' thumbnail = self._search_regex( r'<img[^>]+src=(["\'])(?P<th>[^"\']+)\1', block, 'thumbnail', fatal=False, default=None, group='th' ) entries.append({ 'id': track_id, 'url': audio_url, 'title': title, 'uploader': artist_name, 'vcodec': 'none', 'thumbnail': thumbnail, }) return entries class EggsIE(EggsBaseIE): IE_NAME = 'eggs:single' _VALID_URL = ( r'https?://(?:www\.)?eggs\.mu/artist/(?P<artist_id>[^/]+)/song/(?P<song_id>[^/]+)' ) _TESTS = [{ 'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90', 'info_dict': { 'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90', 'ext': 'm4a', 'title': 'シネマと信号', 'uploader': 'Sunny Girl', 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$', }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) song_id = mobj.group('song_id') webpage = self._download_webpage(url, song_id) artist_name = self._parse_artist_name(webpage) info = self._parse_single_song(url, webpage, artist_name) return info class EggsArtistIE(EggsBaseIE): IE_NAME = 'eggs:artist' _VALID_URL = ( r'https?://(?:www\.)?eggs\.mu/artist/(?P<artist_id>[^/]+)' ) _TESTS = [{ 'url': 'https://eggs.mu/artist/32_sunny_girl', 'info_dict': { 'id': '32_sunny_girl', 'title': 'Sunny Girl', }, 'playlist_count': 18, }] def _real_extract(self, url): artist_id = self._match_valid_url(url).group('artist_id') webpage = self._download_webpage(url, artist_id) artist_name = self._parse_artist_name(webpage) entries = self._parse_artist_page(webpage, artist_id, artist_name) return self.playlist_result( entries, playlist_id=artist_id, playlist_title=artist_name )