[ie/bitchute] Fix extractor

Authored by: bashonly
This commit is contained in:
bashonly 2025-05-02 20:56:35 -05:00
parent b77e5a553a
commit 55ea84864f
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0

View File

@ -1,4 +1,5 @@
import functools import functools
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -7,20 +8,19 @@ from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
clean_html, clean_html,
extract_attributes, determine_ext,
get_element_by_class, get_element_by_class,
get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class, get_elements_html_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_count, parse_count,
parse_duration, parse_duration,
traverse_obj, parse_iso8601,
unified_strdate, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
class BitChuteIE(InfoExtractor): class BitChuteIE(InfoExtractor):
@ -34,12 +34,17 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'This is the first video on #BitChute !', 'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034', 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute', 'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 16.0,
'timestamp': 1483425443,
}, },
}, { }, {
# test case: video with different channel and uploader # test case: video with different channel and uploader
@ -49,13 +54,18 @@ class BitChuteIE(InfoExtractor):
'id': 'Yti_j9A-UZ4', 'id': 'Yti_j9A-UZ4',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Israel at War | Full Measure', 'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef', 'description': 'md5:e60198b89971966d6030d22b3268f08f',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'sharylattkisson', 'uploader': 'sharylattkisson',
'upload_date': '20231106', 'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson', 'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
'uploader_id': '9K0kUWA9zmd9',
'channel_id': 'NpdxoCRv3ZLb',
'view_count': int,
'duration': 554.0,
'timestamp': 1699296106,
}, },
}, { }, {
# video not downloadable in browser, but we can recover it # video not downloadable in browser, but we can recover it
@ -66,13 +76,19 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'filesize': 71537926, 'filesize': 71537926,
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
'description': 'md5:228ee93bd840a24938f536aeac9cf749', 'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20181113', 'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute', 'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 1701.0,
'tags': ['bitchute'],
'timestamp': 1542130287,
}, },
'params': {'check_formats': None}, 'params': {'check_formats': None},
}, { }, {
@ -82,6 +98,18 @@ class BitChuteIE(InfoExtractor):
'id': 'WEnQU7XGcTdl', 'id': 'WEnQU7XGcTdl',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
'description': 'md5:e7e8390ab79d2c84f3f5d068ed333535',
'uploader': 'Freier_Mann',
'uploader_id': 'OBhKX1Ss0jyL',
'uploader_url': 'https://www.bitchute.com/profile/OBhKX1Ss0jyL/',
'channel': 'Der Freie',
'channel_id': 'dV8xxWKIxSVU',
'channel_url': 'https://www.bitchute.com/channel/freier_mann/',
'view_count': int,
'duration': 4806.0,
'thumbnail': r're:https?://.+/.+\.jpg$',
'timestamp': 1609918804,
'upload_date': '20210106',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'skip': 'Georestricted in DE', 'skip': 'Georestricted in DE',
@ -97,11 +125,6 @@ class BitChuteIE(InfoExtractor):
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
'Referer': 'https://www.bitchute.com/',
}
def _check_format(self, video_url, video_id): def _check_format(self, video_url, video_id):
urls = orderedSet( urls = orderedSet(
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
@ -112,7 +135,7 @@ class BitChuteIE(InfoExtractor):
for url in urls: for url in urls:
try: try:
response = self._request_webpage( response = self._request_webpage(
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) HEADRequest(url), video_id=video_id, note=f'Checking {url}')
except ExtractorError as e: except ExtractorError as e:
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
continue continue
@ -121,54 +144,69 @@ class BitChuteIE(InfoExtractor):
'filesize': int_or_none(response.headers.get('Content-Length')), 'filesize': int_or_none(response.headers.get('Content-Length')),
} }
def _raise_if_restricted(self, webpage): def _call_api(self, endpoint, data, display_id, fatal=True):
page_title = clean_html(get_element_by_class('page-title', webpage)) or '' note = endpoint.rpartition('/')[2]
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): # TODO: Add error handling for geo-restriction
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title return self._download_json(
self.raise_geo_restricted(reason) f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
@staticmethod fatal=fatal, data=json.dumps(data).encode(),
def _make_url(html): headers={
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href') 'Accept': 'application/json',
return urljoin('https://www.bitchute.com', path) 'Content-Type': 'application/json',
})
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( data = {'video_id': video_id}
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) media_url = self._call_api('video/media', data, video_id)['media_url']
self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
entries = self._parse_html5_media_entries(url, webpage, video_id)
formats = [] formats = []
for format_ in traverse_obj(entries, (0, 'formats', ...)): if determine_ext(media_url) == 'm3u8':
formats.extend(
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
else:
if self.get_param('check_formats') is not False: if self.get_param('check_formats') is not False:
format_.update(self._check_format(format_.pop('url'), video_id) or {}) if fmt := self._check_format(media_url, video_id):
if 'url' not in format_: formats.append(fmt)
continue else:
formats.append(format_) formats.append({'url': media_url})
if not formats: if not formats:
self.raise_no_formats( self.raise_no_formats(
'Video is unavailable. Please make sure this video is playable in the browser ' 'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id) 'before reporting this issue.', expected=True, video_id=video_id)
details = get_element_by_class('details', webpage) or '' video = self._call_api('video', data, video_id, fatal=False)
uploader_html = get_element_html_by_class('creator', details) or '' channel = None
channel_html = get_element_html_by_class('name', details) or '' if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
return { return {
**traverse_obj(video, {
'title': ('video_name', {str}),
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'channel': ('channel', 'channel_name', {str}),
'channel_id': ('channel', 'channel_id', {str}),
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {lambda x: f'https://www.bitchute.com/profile/{x}/' if x else None}),
'timestamp': ('date_published', {parse_iso8601}),
'duration': ('duration', {parse_duration}),
'tags': ('hashtags', ..., {str}, filter, all, filter),
'view_count': ('view_count', {int_or_none}),
'is_live': ('state_id', {lambda x: x == 'live'}),
}),
**traverse_obj(channel, {
'channel': ('channel_name', {str}),
'channel_id': ('channel_id', {str}),
'channel_url': ('url_slug', {lambda x: f'https://www.bitchute.com/channel/{x}/' if x else None}),
'uploader': ('profile_name', {str}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {lambda x: f'https://www.bitchute.com/profile/{x}/' if x else None}),
}),
'id': video_id, 'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(uploader_html),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats, 'formats': formats,
} }
@ -190,7 +228,7 @@ class BitChuteChannelIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'This is the first video on #BitChute !', 'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034', 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
@ -198,6 +236,9 @@ class BitChuteChannelIE(InfoExtractor):
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16, 'duration': 16,
'view_count': int, 'view_count': int,
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'timestamp': 1483425443,
}, },
}, },
], ],
@ -213,6 +254,7 @@ class BitChuteChannelIE(InfoExtractor):
'title': 'Bruce MacDonald and "The Light of Darkness"', 'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e', 'description': 'md5:747724ef404eebdfc04277714f81863e',
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True, 'only_matching': True,