[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.
master
Jaime Marquínez Ferrándiz 11 years ago
parent df3e61003a
commit 648d25d43d

@ -29,7 +29,10 @@ from .escapist import EscapistIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .francetv import PluzzIE from .francetv import (
PluzzIE,
FranceTvInfoIE,
)
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE

@ -8,7 +8,29 @@ from ..utils import (
) )
class PluzzIE(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
video_url = video_url.replace('/z/', '/i/')
thumbnail_path = info.find('image').text
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
'description': info.find('synopsis').text,
}
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr' IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
@ -29,22 +51,27 @@ class PluzzIE(InfoExtractor):
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
video_id = self._search_regex( video_id = self._search_regex(
r'data-diffusion="(\d+)"', webpage, 'ID') r'data-diffusion="(\d+)"', webpage, 'ID')
return self._extract_video(video_id)
xml_desc = self._download_webpage(
'http://www.pluzz.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, title, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text class FranceTvInfoIE(FranceTVBaseInfoExtractor):
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') IE_NAME = u'francetvinfo.fr'
video_url = video_url.replace('/z/', '/i/') _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
thumbnail_path = info.find('image').text
return {'id': video_id, _TEST = {
'ext': 'mp4', u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'url': video_url, u'file': u'84981923.mp4',
'title': info.find('titre').text, u'info_dict': {
'thumbnail': compat_urlparse.urljoin(url, thumbnail_path), u'title': u'Soir 3',
'description': info.find('synopsis').text, },
} u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)

Loading…
Cancel
Save