From 785e507ef06b709f7f68744f29c16a5cdb8942f2 Mon Sep 17 00:00:00 2001 From: 0xvd <199783523+0xvd@users.noreply.github.com> Date: Sun, 28 Jun 2026 03:43:07 +0530 Subject: [PATCH] [ie/soundcloud] Extract comments (#16938) Closes #16717 Authored by: 0xvd --- yt_dlp/extractor/common.py | 2 + yt_dlp/extractor/soundcloud.py | 112 ++++++++++++++++++++++----------- 2 files changed, 79 insertions(+), 35 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 048c542858..8f5c3a3c64 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -375,6 +375,8 @@ class InfoExtractor: favorite by the video uploader * "is_pinned" - Whether the comment is pinned to the top of the comments + * "start_time" - Start time (in seconds) for displaying the comment + * "end_time" - End time (in seconds) for displaying the comment age_limit: Age restriction for the video, as an integer (years) webpage_url: The URL to the video webpage, if given to yt-dlp it should allow to get the same result again. (It will be set diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 3d95b25403..a0f302f44c 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -429,12 +429,55 @@ class SoundcloudBaseIE(InfoExtractor): 'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)), 'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)), 'formats': formats if not extract_flat else None, + '__post_extractor': self.extract_comments(track_id), } @classmethod def _resolv_url(cls, url): return cls._API_V2_BASE + 'resolve?url=' + url + def _get_comments(self, track_id): + available_filters = ('newest', 'oldest', 'track-timestamp') + sort_by = self._configuration_arg('comments_sort_by', default=[None], ie_key='soundcloud')[0] + if not sort_by: + sort_by = available_filters[0] + self.to_screen( + f'Defaulting to sort comments by {sort_by}. ' + f'Configure this with --extractor-args soundcloud:comments_sort_by=FILTER . ' + f'Available filters: {", ".join(available_filters)}') + elif sort_by not in available_filters: + raise ExtractorError(f'Invalid comments_sort_by filter: {sort_by}', expected=True) + else: + self.to_screen(f'Sorting comments by {sort_by}') + + next_url = update_url_query( + f'{self._API_V2_BASE}tracks/{track_id}/comments', { + 'sort': sort_by, + 'limit': '20', + 'offset': '0', + 'threaded': '1', + }) + for page_num in itertools.count(1): + page = self._call_api(next_url, track_id, note=f'Downloading comments page {page_num}') + + for comment_dict in traverse_obj(page, ('collection', lambda _, v: isinstance(v['id'], int))): + yield traverse_obj(comment_dict, { + 'id': ('id', {int}, {str_or_none}), + 'author_id': ('user', 'id', {int}, {str_or_none}), + 'author': ('user', 'username', {str}), + 'author_thumbnail': ('user', 'avatar_url', {url_or_none}), + 'author_url': ('user', 'permalink_url', {url_or_none}), + 'author_is_verified': ('user', 'verified', {bool}), + 'timestamp': ('created_at', {unified_timestamp}), + 'text': ('body', {str}), + 'start_time': ('timestamp', {float_or_none(scale=1000)}), + 'end_time': ('timestamp', {float_or_none(scale=1000)}), + }) + + next_url = page.get('next_href') + if not next_url: + break + class SoundcloudIE(SoundcloudBaseIE): """Information extractor for soundcloud.com @@ -460,10 +503,9 @@ class SoundcloudIE(SoundcloudBaseIE): IE_NAME = 'soundcloud' _TESTS = [{ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', - 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2', 'info_dict': { 'id': '62986583', - 'ext': 'opus', + 'ext': 'm4a', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae', @@ -471,7 +513,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_id': '1571244', 'timestamp': 1349920598, 'upload_date': '20121011', - 'duration': 143.216, + 'duration': 143.206, 'license': 'all-rights-reserved', 'view_count': int, 'like_count': int, @@ -481,31 +523,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_url': 'https://soundcloud.com/ethmusic', 'tags': 'count:14', }, - }, { - # Geo-restricted - 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', - 'info_dict': { - 'id': '47127627', - 'ext': 'opus', - 'title': 'Goldrushed', - 'track': 'Goldrushed', - 'description': 'md5:c0080b79a3710811d60234f94f391a40', - 'uploader': 'The Royal Concept', - 'uploader_id': '9615865', - 'timestamp': 1337635207, - 'upload_date': '20120521', - 'duration': 227.103, - 'license': 'all-rights-reserved', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - 'uploader_url': 'https://soundcloud.com/the-concept-band', - 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)', - 'genres': ['Alternative'], - 'artists': ['The Royal Concept'], - 'tags': [], - }, + 'params': {'skip_download': 'm3u8'}, }, { # private link 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', @@ -559,10 +577,9 @@ class SoundcloudIE(SoundcloudBaseIE): }, { # downloadable song 'url': 'https://soundcloud.com/the80m/the-following', - 'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04' 'info_dict': { 'id': '343609555', - 'ext': 'opus', # wav original available with auth + 'ext': 'm4a', # wav original available with auth 'title': 'The Following', 'track': 'The Following', 'description': '', @@ -571,7 +588,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_url': 'https://soundcloud.com/the80m', 'upload_date': '20170922', 'timestamp': 1506120436, - 'duration': 397.228, + 'duration': 397.175, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)', 'license': 'all-rights-reserved', 'like_count': int, @@ -582,15 +599,16 @@ class SoundcloudIE(SoundcloudBaseIE): 'artists': ['80M'], 'tags': 'count:4', }, + 'params': {'skip_download': 'm3u8'}, 'expected_warnings': ['Original download format is only available for registered users'], }, { # private link, downloadable format # tags with spaces (e.g. "Uplifting Trance", "Ori Uplift") 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', - 'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e' + 'md5': '59ed00579679f3b660dfdb8d2dbf8589', # wav: '64a60b16e617d41d0bef032b7f55441e' 'info_dict': { 'id': '340344461', - 'ext': 'opus', # wav original available with auth + 'ext': 'mp3', # wav original available with auth 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', @@ -617,7 +635,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'md5': '59c7872bc44e5d99b7211891664760c2', 'info_dict': { 'id': '309699954', - 'ext': 'mp3', + 'ext': 'm4a', 'title': 'Sideways (Prod. Mad Real)', 'track': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', @@ -625,7 +643,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_id': '2366352', 'timestamp': 1488152409, 'upload_date': '20170226', - 'duration': 207.012, + 'duration': 206.988, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)', 'license': 'all-rights-reserved', 'view_count': int, @@ -699,6 +717,30 @@ class SoundcloudIE(SoundcloudBaseIE): ], }, 'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'}, + }, { + 'url': 'https://soundcloud.com/user615617514/dagames', + 'info_dict': { + 'id': '309858375', + 'ext': 'm4a', + 'title': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES', + 'description': '', + 'uploader': 'xXwolffykittyXx', + 'uploader_id': '157677999', + 'uploader_url': 'https://soundcloud.com/user615617514', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'duration': 241.601, + 'thumbnail': 'https://i1.sndcdn.com/artworks-000209893581-orfv6t-original.jpg', + 'tags': [], + 'artists': ['BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL '], + 'track': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES', + 'timestamp': 1488232827, + 'upload_date': '20170227', + 'license': 'all-rights-reserved', + }, + 'params': {'get_comments': True, 'skip_download': 'm3u8'}, }, { # AAC HQ format available (account with active subscription needed) 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',