[ie/soundcloud] Extract comments (#16938)

Closes #16717
Authored by: 0xvd
This commit is contained in:
0xvd 2026-06-28 03:43:07 +05:30 committed by GitHub
parent 6a24c96f7f
commit 785e507ef0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 79 additions and 35 deletions

View File

@ -375,6 +375,8 @@ class InfoExtractor:
favorite by the video uploader favorite by the video uploader
* "is_pinned" - Whether the comment is pinned to * "is_pinned" - Whether the comment is pinned to
the top of the comments the top of the comments
* "start_time" - Start time (in seconds) for displaying the comment
* "end_time" - End time (in seconds) for displaying the comment
age_limit: Age restriction for the video, as an integer (years) age_limit: Age restriction for the video, as an integer (years)
webpage_url: The URL to the video webpage, if given to yt-dlp it webpage_url: The URL to the video webpage, if given to yt-dlp it
should allow to get the same result again. (It will be set should allow to get the same result again. (It will be set

View File

@ -429,12 +429,55 @@ class SoundcloudBaseIE(InfoExtractor):
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)), 'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)), 'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None, 'formats': formats if not extract_flat else None,
'__post_extractor': self.extract_comments(track_id),
} }
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return cls._API_V2_BASE + 'resolve?url=' + url return cls._API_V2_BASE + 'resolve?url=' + url
def _get_comments(self, track_id):
available_filters = ('newest', 'oldest', 'track-timestamp')
sort_by = self._configuration_arg('comments_sort_by', default=[None], ie_key='soundcloud')[0]
if not sort_by:
sort_by = available_filters[0]
self.to_screen(
f'Defaulting to sort comments by {sort_by}. '
f'Configure this with --extractor-args soundcloud:comments_sort_by=FILTER . '
f'Available filters: {", ".join(available_filters)}')
elif sort_by not in available_filters:
raise ExtractorError(f'Invalid comments_sort_by filter: {sort_by}', expected=True)
else:
self.to_screen(f'Sorting comments by {sort_by}')
next_url = update_url_query(
f'{self._API_V2_BASE}tracks/{track_id}/comments', {
'sort': sort_by,
'limit': '20',
'offset': '0',
'threaded': '1',
})
for page_num in itertools.count(1):
page = self._call_api(next_url, track_id, note=f'Downloading comments page {page_num}')
for comment_dict in traverse_obj(page, ('collection', lambda _, v: isinstance(v['id'], int))):
yield traverse_obj(comment_dict, {
'id': ('id', {int}, {str_or_none}),
'author_id': ('user', 'id', {int}, {str_or_none}),
'author': ('user', 'username', {str}),
'author_thumbnail': ('user', 'avatar_url', {url_or_none}),
'author_url': ('user', 'permalink_url', {url_or_none}),
'author_is_verified': ('user', 'verified', {bool}),
'timestamp': ('created_at', {unified_timestamp}),
'text': ('body', {str}),
'start_time': ('timestamp', {float_or_none(scale=1000)}),
'end_time': ('timestamp', {float_or_none(scale=1000)}),
})
next_url = page.get('next_href')
if not next_url:
break
class SoundcloudIE(SoundcloudBaseIE): class SoundcloudIE(SoundcloudBaseIE):
"""Information extractor for soundcloud.com """Information extractor for soundcloud.com
@ -460,10 +503,9 @@ class SoundcloudIE(SoundcloudBaseIE):
IE_NAME = 'soundcloud' IE_NAME = 'soundcloud'
_TESTS = [{ _TESTS = [{
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'info_dict': { 'info_dict': {
'id': '62986583', 'id': '62986583',
'ext': 'opus', 'ext': 'm4a',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae', 'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae',
@ -471,7 +513,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_id': '1571244', 'uploader_id': '1571244',
'timestamp': 1349920598, 'timestamp': 1349920598,
'upload_date': '20121011', 'upload_date': '20121011',
'duration': 143.216, 'duration': 143.206,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -481,31 +523,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/ethmusic', 'uploader_url': 'https://soundcloud.com/ethmusic',
'tags': 'count:14', 'tags': 'count:14',
}, },
}, { 'params': {'skip_download': 'm3u8'},
# Geo-restricted
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'id': '47127627',
'ext': 'opus',
'title': 'Goldrushed',
'track': 'Goldrushed',
'description': 'md5:c0080b79a3710811d60234f94f391a40',
'uploader': 'The Royal Concept',
'uploader_id': '9615865',
'timestamp': 1337635207,
'upload_date': '20120521',
'duration': 227.103,
'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'genres': ['Alternative'],
'artists': ['The Royal Concept'],
'tags': [],
},
}, { }, {
# private link # private link
'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
@ -559,10 +577,9 @@ class SoundcloudIE(SoundcloudBaseIE):
}, { }, {
# downloadable song # downloadable song
'url': 'https://soundcloud.com/the80m/the-following', 'url': 'https://soundcloud.com/the80m/the-following',
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
'info_dict': { 'info_dict': {
'id': '343609555', 'id': '343609555',
'ext': 'opus', # wav original available with auth 'ext': 'm4a', # wav original available with auth
'title': 'The Following', 'title': 'The Following',
'track': 'The Following', 'track': 'The Following',
'description': '', 'description': '',
@ -571,7 +588,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the80m', 'uploader_url': 'https://soundcloud.com/the80m',
'upload_date': '20170922', 'upload_date': '20170922',
'timestamp': 1506120436, 'timestamp': 1506120436,
'duration': 397.228, 'duration': 397.175,
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'like_count': int, 'like_count': int,
@ -582,15 +599,16 @@ class SoundcloudIE(SoundcloudBaseIE):
'artists': ['80M'], 'artists': ['80M'],
'tags': 'count:4', 'tags': 'count:4',
}, },
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Original download format is only available for registered users'], 'expected_warnings': ['Original download format is only available for registered users'],
}, { }, {
# private link, downloadable format # private link, downloadable format
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift") # tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e' 'md5': '59ed00579679f3b660dfdb8d2dbf8589', # wav: '64a60b16e617d41d0bef032b7f55441e'
'info_dict': { 'info_dict': {
'id': '340344461', 'id': '340344461',
'ext': 'opus', # wav original available with auth 'ext': 'mp3', # wav original available with auth
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
@ -617,7 +635,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'md5': '59c7872bc44e5d99b7211891664760c2', 'md5': '59c7872bc44e5d99b7211891664760c2',
'info_dict': { 'info_dict': {
'id': '309699954', 'id': '309699954',
'ext': 'mp3', 'ext': 'm4a',
'title': 'Sideways (Prod. Mad Real)', 'title': 'Sideways (Prod. Mad Real)',
'track': 'Sideways (Prod. Mad Real)', 'track': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
@ -625,7 +643,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_id': '2366352', 'uploader_id': '2366352',
'timestamp': 1488152409, 'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207.012, 'duration': 206.988,
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
@ -699,6 +717,30 @@ class SoundcloudIE(SoundcloudBaseIE):
], ],
}, },
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'}, 'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
}, {
'url': 'https://soundcloud.com/user615617514/dagames',
'info_dict': {
'id': '309858375',
'ext': 'm4a',
'title': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
'description': '',
'uploader': 'xXwolffykittyXx',
'uploader_id': '157677999',
'uploader_url': 'https://soundcloud.com/user615617514',
'comment_count': int,
'view_count': int,
'like_count': int,
'repost_count': int,
'duration': 241.601,
'thumbnail': 'https://i1.sndcdn.com/artworks-000209893581-orfv6t-original.jpg',
'tags': [],
'artists': ['BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL '],
'track': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
'timestamp': 1488232827,
'upload_date': '20170227',
'license': 'all-rights-reserved',
},
'params': {'get_comments': True, 'skip_download': 'm3u8'},
}, { }, {
# AAC HQ format available (account with active subscription needed) # AAC HQ format available (account with active subscription needed)
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1', 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',