From 07154c793065bca816793186590d8d6461e07478 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 22 Oct 2019 17:53:47 +0100 Subject: [PATCH] [facebook] extract subtitles(closes #22777) --- youtube_dl/extractor/ceskatelevize.py | 2 ++ youtube_dl/extractor/facebook.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1ec58f7d8..7cb4efb74 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'drmOnly=true' in stream_url: + continue if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index a56f85c21..c723726b7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') + subtitles = {} formats = [] for f in video_data: format_id = f['stream_type'] @@ -402,6 +403,9 @@ class FacebookIE(InfoExtractor): if dash_manifest: formats.extend(self._parse_mpd_formats( compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) + subtitles_src = f[0].get('subtitles_src') + if subtitles_src: + subtitles.setdefault('en', []).append({'url': subtitles_src}) if not formats: raise ExtractorError('Cannot find video formats') @@ -447,6 +451,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': view_count, + 'subtitles': subtitles, } return webpage, info_dict