From 3f7e8750d4658702b00237370da1a1c680fdb548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 4 Feb 2016 20:16:47 +0100 Subject: [PATCH] [arte.tv:+7] Fix extraction (fixes #8427) --- youtube_dl/extractor/arte.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b9e07f0ef..6ed855a57 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -13,6 +13,7 @@ from ..utils import ( unified_strdate, get_element_by_attribute, int_or_none, + NO_DEFAULT, qualities, ) @@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor): json_url = self._html_search_regex( patterns, webpage, 'json vp url', default=None) if not json_url: - iframe_url = self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url') + def find_iframe_url(webpage, default=NO_DEFAULT): + return self._html_search_regex( + r']+src=(["\'])(?P.+\bjson_url=.+?)\1', + webpage, 'iframe url', group='url', default=default) + + iframe_url = find_iframe_url(webpage, None) + if not iframe_url: + embed_url = self._html_search_regex( + r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url') + player = self._download_json( + embed_url, video_id, 'Downloading player page') + iframe_url = find_iframe_url(player['html']) json_url = compat_parse_qs( compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] return self._extract_from_json_url(json_url, video_id, lang)