Use the new '_download_xml' helper in more extractors

master
Jaime Marquínez Ferrándiz 11 years ago
parent 6e47b51eef
commit e26f871228

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key') webpage, u'key')
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key) key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text video_title = config_xml.find('title').text

@ -1,7 +1,6 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
"""Extract from videos.arte.tv""" """Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata') ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref'] config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
"""Extract form http://liveweb.arte.tv/""" """Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information') video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event') event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd') url_node = event_doc.find('video').find('urlHd')
if url_node is None: if url_node is None:

@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, mobj.group('path')) webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id, doc = self._download_xml(info_url,video_id,
u'Downloading video info') u'Downloading video info')
self.report_extraction(video_id) self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0] video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS') infos = video_info.find('INFOS')
media = video_info.find('MEDIA') media = video_info.find('MEDIA')

@ -1,6 +1,5 @@
import re import re
import time import time
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@ -25,9 +24,8 @@ class ClipfishIE(InfoExtractor):
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time()))) (video_id, int(time.time())))
info_xml = self._download_webpage( doc = self._download_xml(
info_url, video_id, note=u'Downloading info page') info_url, video_id, note=u'Downloading info page')
doc = xml.etree.ElementTree.fromstring(info_xml)
title = doc.find('title').text title = doc.find('title').text
video_url = doc.find('filename').text video_url = doc.find('filename').text
thumbnail = doc.find('imageurl').text thumbnail = doc.find('imageurl').text

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import determine_ext
@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
path = mobj.group('path') path = mobj.group('path')
page_title = mobj.group('title') page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title) info = self._download_xml(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = [] formats = []
for f in info.findall('files/file'): for f in info.findall('files/file'):

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVIE, _media_xml_tag
@ -158,13 +157,12 @@ class ComedyCentralShowsIE(InfoExtractor):
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
indexXml = self._download_webpage(indexUrl, epTitle, idoc = self._download_xml(indexUrl, epTitle,
u'Downloading show index', u'Downloading show index',
u'unable to download episode index') u'unable to download episode index')
results = [] results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item') itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls): for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text mediaId = itemEl.findall('./guid')[0].text
@ -175,10 +173,9 @@ class ComedyCentralShowsIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId})) compat_urllib_parse.urlencode({'uri': mediaId}))
configXml = self._download_webpage(configUrl, epTitle, cdoc = self._download_xml(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId) u'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = [] turls = []
for rendition in cdoc.findall('.//rendition'): for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)

@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id') webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage( info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info') u'Downloading video info')
urls_xml = self._download_webpage( urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info') video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id) self.to_screen(u'%s: Getting video urls' % video_id)
formats = [] formats = []
@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
'vid': full_id, 'vid': full_id,
'profile': profile, 'profile': profile,
}) })
url_xml = self._download_webpage( url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False) video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text format_url = url_doc.find('result/url').text
formats.append({ formats.append({
'url': format_url, 'url': format_url,

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details') details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
thumbnail_els = details_doc.findall('.//teaserimage') thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{ thumbnails = [{

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import determine_ext
@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config_xml = self._download_webpage( config = self._download_xml(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text video_url = config.find('file').text
return { return {

@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url') u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id, config = self._download_xml(config_xml_url, video_id,
u'Downloading config xml') u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS') encodings = config.find('ENCODINGS')
formats = [] formats = []

@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@ -11,11 +10,10 @@ from ..utils import (
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id): def _extract_video(self, video_id):
xml_desc = self._download_webpage( info = self._download_xml(
'http://www.francetvinfo.fr/appftv/webservices/video/' 'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion=' 'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config') + video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
video_id = query_dic['publishedid'][0] video_id = query_dic['publishedid'][0]
url = self._build_url(query) url = self._build_url(query)
flashconfiguration_xml = self._download_webpage(url, video_id, flashconfiguration = self._download_xml(url, video_id,
u'Downloading flash configuration') u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality # Replace some of the parameters in the query to get the best quality
@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
file_url = re.sub(r'(?<=\?)(.+)$', file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()), lambda m: self._clean_query(m.group()),
file_url) file_url)
info_xml = self._download_webpage(file_url, video_id, info = self._download_xml(file_url, video_id,
u'Downloading video info') u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item') item = info.find('channel/item')
def _bp(p): def _bp(p):

@ -2,7 +2,6 @@
import json import json
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID') xml_link, u'video ID')
xml_config = self._download_webpage( config = self._download_xml(
xml_link, title, u'Downloading XML config') xml_link, title, u'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) info_json = config.find('format.json').text
info_json = self._search_regex(
r'(?sm)<format\.json>(.*?)</format\.json>',
xml_config, u'JSON information')
info = json.loads(info_json)['versions'][0] info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file'] video_url = 'http://video720.jeuxvideo.com/' + info['file']

@ -1,7 +1,6 @@
import json import json
import os import os
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
archive_id = m.group(1) archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
chapter_info_xml = self._download_webpage(api, chapter_id, doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information', note=u'Downloading chapter information',
errnote=u'Chapter information download failed') errnote=u'Chapter information download failed')
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
for a in doc.findall('.//archive'): for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text: if archive_id == a.find('./id').text:
break break

@ -1,6 +1,5 @@
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
user = mobj.group('user') user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id) info = self._download_xml(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
item = info.find('channel').find('item') item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'} ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']

@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
def _get_videos_info(self, uri): def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri}) data = compat_urllib_parse.urlencode({'uri': uri})
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id, idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
u'Downloading info') u'Downloading info')
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url): def _real_extract(self, url):

@ -1,5 +1,4 @@
import os.path import os.path
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
# get metadata # get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id) metadata = self._download_xml(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
# extract values from metadata # extract values from metadata
url_flv_el = metadata.find('url_flv') url_flv_el = metadata.find('url_flv')

@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
'protocol': 'p2p', 'protocol': 'p2p',
'inKey': key, 'inKey': key,
}) })
info_xml = self._download_webpage( info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info') video_id, u'Downloading video info')
urls_xml = self._download_webpage( urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info') video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = [] formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'): for format_el in urls.findall('EncodingOptions/EncodingOption'):

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str from ..utils import find_xpath_attr, compat_str
@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video') info = all_info.find('video')
return {'id': video_id, return {'id': video_id,
'title': info.find('headline').text, 'title': info.find('headline').text,

@ -1,6 +1,5 @@
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'), 'path': initial_video_url.replace('.mp4', '_sd.mp4'),
}) })
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id, path_doc = self._download_xml(path_url, video_id,
u'Downloading final video url') u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text video_url = path_doc.find('path').text
join = compat_urlparse.urljoin join = compat_urlparse.urljoin

@ -2,7 +2,6 @@
import re import re
import socket import socket
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
# the cookies in order to be able to download the info webpage # the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info_webpage = self._download_webpage( video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page') note=u'Downloading video info page')
@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information # Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper() video_format = video_extension.upper()
@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
video_uploader = video_uploader_id video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try: try:
user_info_webpage = self._download_webpage( user_info = self._download_xml(
url, video_id, note=u'Downloading user information') url, video_id, note=u'Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err)) self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return { return {
'id': video_id, 'id': video_id,

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id): def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id}) data = compat_urllib_parse.urlencode({'vid': video_id})
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data, url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url') video_id, u'Downloading video url')
image_page = self._download_webpage( image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info') video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id, return {'id': video_id,
'url': url_doc.find('./durl/url').text, 'url': url_doc.find('./durl/url').text,

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
r'<div class="module-title">(.*?)</div>', webpage, u'title') r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage( idoc = self._download_xml(
xml_url, video_id, xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML') note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
formats = [ formats = [
{ {
'format_id': n.tag.rpartition('type')[2], 'format_id': n.tag.rpartition('type')[2],

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage') data = self._download_xml(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
qualities = ['500k', '480p', '1000k', '720p', '1080p'] qualities = ['500k', '480p', '1000k', '720p', '1080p']

@ -1,6 +1,5 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_webpage = self._download_webpage( streams_doc = self._download_xml(
streams_url, video_id, note=u'Downloading stream list') streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text video_url = next(n.text
for n in streams_doc.findall('.//choice/url') for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text) if u'//ad.doubleclick' not in n.text)

@ -1,6 +1,5 @@
import json import json
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log) u'video-formats2' % log)
format_str = self._download_webpage( format_doc = self._download_xml(
format_url, video_id, format_url, video_id,
note=u'Downloading formats', note=u'Downloading formats',
errnote=u'Error while downloading formats') errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = ( video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id) video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video') video = config.find('video')
sources = video.find('sources') sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)

@ -11,7 +11,6 @@ import socket
import string import string
import struct import struct
import traceback import traceback
import xml.etree.ElementTree
import zlib import zlib
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
@ -1144,8 +1143,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asrs': 1, 'asrs': 1,
}) })
list_url = caption_url + '&' + list_params list_url = caption_url + '&' + list_params
list_page = self._download_webpage(list_url, video_id) caption_list = self._download_xml(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
original_lang_node = caption_list.find('track') original_lang_node = caption_list.find('track')
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
self._downloader.report_warning(u'Video doesn\'t have automatic captions') self._downloader.report_warning(u'Video doesn\'t have automatic captions')

Loading…
Cancel
Save