mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-07-02 16:28:52 +00:00
[ie/soundcloud] Extract comments (#16938)
Closes #16717 Authored by: 0xvd
This commit is contained in:
parent
6a24c96f7f
commit
785e507ef0
@ -375,6 +375,8 @@ class InfoExtractor:
|
|||||||
favorite by the video uploader
|
favorite by the video uploader
|
||||||
* "is_pinned" - Whether the comment is pinned to
|
* "is_pinned" - Whether the comment is pinned to
|
||||||
the top of the comments
|
the top of the comments
|
||||||
|
* "start_time" - Start time (in seconds) for displaying the comment
|
||||||
|
* "end_time" - End time (in seconds) for displaying the comment
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
|
|||||||
@ -429,12 +429,55 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||||||
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
|
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
|
||||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||||
'formats': formats if not extract_flat else None,
|
'formats': formats if not extract_flat else None,
|
||||||
|
'__post_extractor': self.extract_comments(track_id),
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolv_url(cls, url):
|
def _resolv_url(cls, url):
|
||||||
return cls._API_V2_BASE + 'resolve?url=' + url
|
return cls._API_V2_BASE + 'resolve?url=' + url
|
||||||
|
|
||||||
|
def _get_comments(self, track_id):
|
||||||
|
available_filters = ('newest', 'oldest', 'track-timestamp')
|
||||||
|
sort_by = self._configuration_arg('comments_sort_by', default=[None], ie_key='soundcloud')[0]
|
||||||
|
if not sort_by:
|
||||||
|
sort_by = available_filters[0]
|
||||||
|
self.to_screen(
|
||||||
|
f'Defaulting to sort comments by {sort_by}. '
|
||||||
|
f'Configure this with --extractor-args soundcloud:comments_sort_by=FILTER . '
|
||||||
|
f'Available filters: {", ".join(available_filters)}')
|
||||||
|
elif sort_by not in available_filters:
|
||||||
|
raise ExtractorError(f'Invalid comments_sort_by filter: {sort_by}', expected=True)
|
||||||
|
else:
|
||||||
|
self.to_screen(f'Sorting comments by {sort_by}')
|
||||||
|
|
||||||
|
next_url = update_url_query(
|
||||||
|
f'{self._API_V2_BASE}tracks/{track_id}/comments', {
|
||||||
|
'sort': sort_by,
|
||||||
|
'limit': '20',
|
||||||
|
'offset': '0',
|
||||||
|
'threaded': '1',
|
||||||
|
})
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
page = self._call_api(next_url, track_id, note=f'Downloading comments page {page_num}')
|
||||||
|
|
||||||
|
for comment_dict in traverse_obj(page, ('collection', lambda _, v: isinstance(v['id'], int))):
|
||||||
|
yield traverse_obj(comment_dict, {
|
||||||
|
'id': ('id', {int}, {str_or_none}),
|
||||||
|
'author_id': ('user', 'id', {int}, {str_or_none}),
|
||||||
|
'author': ('user', 'username', {str}),
|
||||||
|
'author_thumbnail': ('user', 'avatar_url', {url_or_none}),
|
||||||
|
'author_url': ('user', 'permalink_url', {url_or_none}),
|
||||||
|
'author_is_verified': ('user', 'verified', {bool}),
|
||||||
|
'timestamp': ('created_at', {unified_timestamp}),
|
||||||
|
'text': ('body', {str}),
|
||||||
|
'start_time': ('timestamp', {float_or_none(scale=1000)}),
|
||||||
|
'end_time': ('timestamp', {float_or_none(scale=1000)}),
|
||||||
|
})
|
||||||
|
|
||||||
|
next_url = page.get('next_href')
|
||||||
|
if not next_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudIE(SoundcloudBaseIE):
|
class SoundcloudIE(SoundcloudBaseIE):
|
||||||
"""Information extractor for soundcloud.com
|
"""Information extractor for soundcloud.com
|
||||||
@ -460,10 +503,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
IE_NAME = 'soundcloud'
|
IE_NAME = 'soundcloud'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||||
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62986583',
|
'id': '62986583',
|
||||||
'ext': 'opus',
|
'ext': 'm4a',
|
||||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||||
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||||
'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae',
|
'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae',
|
||||||
@ -471,7 +513,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'uploader_id': '1571244',
|
'uploader_id': '1571244',
|
||||||
'timestamp': 1349920598,
|
'timestamp': 1349920598,
|
||||||
'upload_date': '20121011',
|
'upload_date': '20121011',
|
||||||
'duration': 143.216,
|
'duration': 143.206,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@ -481,31 +523,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||||
'tags': 'count:14',
|
'tags': 'count:14',
|
||||||
},
|
},
|
||||||
}, {
|
'params': {'skip_download': 'm3u8'},
|
||||||
# Geo-restricted
|
|
||||||
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '47127627',
|
|
||||||
'ext': 'opus',
|
|
||||||
'title': 'Goldrushed',
|
|
||||||
'track': 'Goldrushed',
|
|
||||||
'description': 'md5:c0080b79a3710811d60234f94f391a40',
|
|
||||||
'uploader': 'The Royal Concept',
|
|
||||||
'uploader_id': '9615865',
|
|
||||||
'timestamp': 1337635207,
|
|
||||||
'upload_date': '20120521',
|
|
||||||
'duration': 227.103,
|
|
||||||
'license': 'all-rights-reserved',
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
|
||||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
|
||||||
'genres': ['Alternative'],
|
|
||||||
'artists': ['The Royal Concept'],
|
|
||||||
'tags': [],
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# private link
|
# private link
|
||||||
'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
|
'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
|
||||||
@ -559,10 +577,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# downloadable song
|
# downloadable song
|
||||||
'url': 'https://soundcloud.com/the80m/the-following',
|
'url': 'https://soundcloud.com/the80m/the-following',
|
||||||
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '343609555',
|
'id': '343609555',
|
||||||
'ext': 'opus', # wav original available with auth
|
'ext': 'm4a', # wav original available with auth
|
||||||
'title': 'The Following',
|
'title': 'The Following',
|
||||||
'track': 'The Following',
|
'track': 'The Following',
|
||||||
'description': '',
|
'description': '',
|
||||||
@ -571,7 +588,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'uploader_url': 'https://soundcloud.com/the80m',
|
'uploader_url': 'https://soundcloud.com/the80m',
|
||||||
'upload_date': '20170922',
|
'upload_date': '20170922',
|
||||||
'timestamp': 1506120436,
|
'timestamp': 1506120436,
|
||||||
'duration': 397.228,
|
'duration': 397.175,
|
||||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@ -582,15 +599,16 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'artists': ['80M'],
|
'artists': ['80M'],
|
||||||
'tags': 'count:4',
|
'tags': 'count:4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
'expected_warnings': ['Original download format is only available for registered users'],
|
'expected_warnings': ['Original download format is only available for registered users'],
|
||||||
}, {
|
}, {
|
||||||
# private link, downloadable format
|
# private link, downloadable format
|
||||||
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
|
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
|
||||||
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||||
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
'md5': '59ed00579679f3b660dfdb8d2dbf8589', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '340344461',
|
'id': '340344461',
|
||||||
'ext': 'opus', # wav original available with auth
|
'ext': 'mp3', # wav original available with auth
|
||||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||||
@ -617,7 +635,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'md5': '59c7872bc44e5d99b7211891664760c2',
|
'md5': '59c7872bc44e5d99b7211891664760c2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '309699954',
|
'id': '309699954',
|
||||||
'ext': 'mp3',
|
'ext': 'm4a',
|
||||||
'title': 'Sideways (Prod. Mad Real)',
|
'title': 'Sideways (Prod. Mad Real)',
|
||||||
'track': 'Sideways (Prod. Mad Real)',
|
'track': 'Sideways (Prod. Mad Real)',
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
@ -625,7 +643,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
'uploader_id': '2366352',
|
'uploader_id': '2366352',
|
||||||
'timestamp': 1488152409,
|
'timestamp': 1488152409,
|
||||||
'upload_date': '20170226',
|
'upload_date': '20170226',
|
||||||
'duration': 207.012,
|
'duration': 206.988,
|
||||||
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -699,6 +717,30 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
|
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/user615617514/dagames',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '309858375',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
|
||||||
|
'description': '',
|
||||||
|
'uploader': 'xXwolffykittyXx',
|
||||||
|
'uploader_id': '157677999',
|
||||||
|
'uploader_url': 'https://soundcloud.com/user615617514',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'duration': 241.601,
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000209893581-orfv6t-original.jpg',
|
||||||
|
'tags': [],
|
||||||
|
'artists': ['BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL '],
|
||||||
|
'track': 'BENDY AND THE INK MACHINE SONG (Build Our Machine) INSTRUMENTAL by DAGAMES',
|
||||||
|
'timestamp': 1488232827,
|
||||||
|
'upload_date': '20170227',
|
||||||
|
'license': 'all-rights-reserved',
|
||||||
|
},
|
||||||
|
'params': {'get_comments': True, 'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# AAC HQ format available (account with active subscription needed)
|
# AAC HQ format available (account with active subscription needed)
|
||||||
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user