From b9f04eb20c2783da1a6c96c006f91be47d5847cf Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 26 Nov 2024 00:58:57 +0100 Subject: [PATCH] [ie] Handle fragmented formats in `_remove_duplicate_formats` --- yt_dlp/extractor/common.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 28a3adf936..d9236c8239 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1854,12 +1854,24 @@ class InfoExtractor: @staticmethod def _remove_duplicate_formats(formats): - format_urls = set() + seen_urls = set() + seen_fragments = set() unique_formats = [] for f in formats: - if f['url'] not in format_urls: - format_urls.add(f['url']) + fragments = f.get('fragments') + if fragments and not callable(fragments): + if base_url := f.get('fragment_base_url'): + fragments = map(urljoin(base_url), fragments) + + fragments = frozenset(fragments) + if fragments not in seen_fragments: + seen_fragments.add(fragments) + unique_formats.append(f) + + elif f['url'] not in seen_urls: + seen_urls.add(f['url']) unique_formats.append(f) + formats[:] = unique_formats def _is_valid_url(self, url, video_id, item='video', headers={}):