mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-07-02 16:28:52 +00:00
[ie/soundcloud] Extract comments (#16938)
Closes #16717 Authored by: 0xvd
This commit is contained in:
parent
6a24c96f7f
commit
785e507ef0
@ -375,6 +375,8 @@ class InfoExtractor:
|
||||
favorite by the video uploader
|
||||
* "is_pinned" - Whether the comment is pinned to
|
||||
the top of the comments
|
||||
* "start_time" - Start time (in seconds) for displaying the comment
|
||||
* "end_time" - End time (in seconds) for displaying the comment
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||
should allow to get the same result again. (It will be set
|
||||
|
||||
@ -429,12 +429,55 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
'__post_extractor': self.extract_comments(track_id),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
return cls._API_V2_BASE + 'resolve?url=' + url
|
||||
|
||||
def _get_comments(self, track_id):
|
||||
available_filters = ('newest', 'oldest', 'track-timestamp')
|
||||
sort_by = self._configuration_arg('comments_sort_by', default=[None], ie_key='soundcloud')[0]
|
||||
if not sort_by:
|
||||
sort_by = available_filters[0]
|
||||
self.to_screen(
|
||||
f'Defaulting to sort comments by {sort_by}. '
|
||||
f'Configure this with --extractor-args soundcloud:comments_sort_by=FILTER . '
|
||||
f'Available filters: {", ".join(available_filters)}')
|
||||
elif sort_by not in available_filters:
|
||||
raise ExtractorError(f'Invalid comments_sort_by filter: {sort_by}', expected=True)
|
||||
else:
|
||||
self.to_screen(f'Sorting comments by {sort_by}')
|
||||
|
||||
next_url = update_url_query(
|
||||
f'{self._API_V2_BASE}tracks/{track_id}/comments', {
|
||||
'sort': sort_by,
|
||||
'limit': '20',
|
||||
'offset': '0',
|
||||
'threaded': '1',
|
||||
})
|
||||
for page_num in itertools.count(1):
|
||||
page = self._call_api(next_url, track_id, note=f'Downloading comments page {page_num}')
|
||||
|
||||
for comment_dict in traverse_obj(page, ('collection', lambda _, v: isinstance(v['id'], int))):
|
||||
yield traverse_obj(comment_dict, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'author_id': ('user', 'id', {int}, {str_or_none}),
|
||||
'author': ('user', 'username', {str}),
|
||||
'author_thumbnail': ('user', 'avatar_url', {url_or_none}),
|
||||
'author_url': ('user', 'permalink_url', {url_or_none}),
|
||||
'author_is_verified': ('user', 'verified', {bool}),
|
||||
'timestamp': ('created_at', {unified_timestamp}),
|
||||
'text': ('body', {str}),
|
||||
'start_time': ('timestamp', {float_or_none(scale=1000)}),
|
||||
'end_time': ('timestamp', {float_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
next_url = page.get('next_href')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
|
||||
class SoundcloudIE(SoundcloudBaseIE):
|
||||
"""Information extractor for soundcloud.com
|
||||
@ -460,10 +503,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
IE_NAME = 'soundcloud'
|
||||
_TESTS = [{
|
||||
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
|
||||
'info_dict': {
|
||||
'id': '62986583',
|
||||
'ext': 'opus',
|
||||
'ext': 'm4a',
|
||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae',
|
||||
@ -471,7 +513,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_id': '1571244',
|
||||
'timestamp': 1349920598,
|
||||
'upload_date': '20121011',
|
||||
'duration': 143.216,
|
||||
'duration': 143.206,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@ -481,31 +523,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'tags': 'count:14',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted
|
||||
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
||||
'info_dict': {
|
||||
'id': '47127627',
|
||||
'ext': 'opus',
|
||||
'title': 'Goldrushed',
|
||||
'track': 'Goldrushed',
|
||||
'description': 'md5:c0080b79a3710811d60234f94f391a40',
|
||||
'uploader': 'The Royal Concept',
|
||||
'uploader_id': '9615865',
|
||||
'timestamp': 1337635207,
|
||||
'upload_date': '20120521',
|
||||
'duration': 227.103,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
'tags': [],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# private link
|
||||
'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
|
||||
@ -559,10 +577,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
}, {
|
||||
# downloadable song
|
||||
'url': 'https://soundcloud.com/the80m/the-following',
|
||||
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
|
||||
'info_dict': {
|
||||
'id': '343609555',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'ext': 'm4a', # wav original available with auth
|
||||
'title': 'The Following',
|
||||
'track': 'The Following',
|
||||
'description': '',
|
||||
@ -571,7 +588,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/the80m',
|
||||
'upload_date': '20170922',
|
||||
'timestamp': 1506120436,
|
||||
'duration': 397.228,
|
||||
'duration': 397.175,
|
||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
||||
'license': 'all-rights-reserved',
|
||||
'like_count': int,
|
||||
@ -582,15 +599,16 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'artists': ['80M'],
|
||||
'tags': 'count:4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Original download format is only available for registered users'],
|
||||
}, {
|
||||
# private link, downloadable format
|
||||
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
|
||||
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
||||
'md5': '59ed00579679f3b660dfdb8d2dbf8589', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
||||
'info_dict': {
|
||||
'id': '340344461',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'ext': 'mp3', # wav original available with auth
|
||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||
@ -617,7 +635,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'md5': '59c7872bc44e5d99b7211891664760c2',
|
||||
'info_dict': {
|
||||
'id': '309699954',
|
||||
'ext': 'mp3',
|
||||
'ext': 'm4a',
|
||||
'title': 'Sideways (Prod. Mad Real)',
|
||||
'track': 'Sideways (Prod. Mad Real)',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
@ -625,7 +643,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_id': '2366352',
|
||||
'timestamp': 1488152409,
|
||||
'upload_date': '20170226',
|
||||
'duration': 207.012,
|
||||
'duration': 206.988,
|
||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
@ -699,6 +717,30 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/user615617514/dagames',
|
||||
'info_dict': {
|
||||
'id': '309858375',
|
||||
'ext': 'm4a',
|
||||
'title': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
|
||||
'description': '',
|
||||
'uploader': 'xXwolffykittyXx',
|
||||
'uploader_id': '157677999',
|
||||
'uploader_url': 'https://soundcloud.com/user615617514',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'duration': 241.601,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000209893581-orfv6t-original.jpg',
|
||||
'tags': [],
|
||||
'artists': ['BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL '],
|
||||
'track': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
|
||||
'timestamp': 1488232827,
|
||||
'upload_date': '20170227',
|
||||
'license': 'all-rights-reserved',
|
||||
},
|
||||
'params': {'get_comments': True, 'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# AAC HQ format available (account with active subscription needed)
|
||||
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user