Merge pull request #1413 from tewe/master

Add Ustream channel support
master
Jaime Marquínez Ferrándiz 11 years ago
commit 74ac9bdd82

@ -8,7 +8,7 @@ import json
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
from youtube_dl.utils import *
from helper import FakeYDL
@ -34,5 +34,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
def test_ustream_channel(self):
dl = FakeYDL()
ie = UstreamChannelIE(dl)
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
if __name__ == '__main__':
unittest.main()

@ -97,7 +97,7 @@ from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .unistra import UnistraIE
from .ustream import UstreamIE
from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE

@ -1,6 +1,11 @@
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_html_parser,
)
class UstreamIE(InfoExtractor):
@ -43,3 +48,64 @@ class UstreamIE(InfoExtractor):
'thumbnail': thumbnail,
}
return info
# More robust than regular expressions
class ChannelParser(compat_html_parser.HTMLParser):
"""
<meta name="ustream:channel_id" content="1234">
"""
channel_id = None
def handle_starttag(self, tag, attrs):
if tag != 'meta':
return
values = dict(attrs)
if values.get('name') != 'ustream:channel_id':
return
value = values.get('content', '')
if value.isdigit():
self.channel_id = value
class SocialstreamParser(compat_html_parser.HTMLParser):
"""
<li class="content123 video" data-content-id="123" data-length="1452"
data-href="/recorded/123" data-og-url="/recorded/123">
"""
def __init__(self):
compat_html_parser.HTMLParser.__init__(self)
self.content_ids = []
def handle_starttag(self, tag, attrs):
if tag != 'li':
return
for (attr, value) in attrs:
if attr == 'data-content-id' and value.isdigit():
self.content_ids.append(value)
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
IE_NAME = u'ustream:channel'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
slug = m.group('slug')
p = ChannelParser()
p.feed(self._download_webpage(url, slug))
p.close()
channel_id = p.channel_id
p = SocialstreamParser()
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
while next_url:
reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
p.feed(reply['data'])
next_url = reply['nextUrl']
p.close()
video_ids = p.content_ids
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
return self.playlist_result(url_entries, channel_id)

Loading…
Cancel
Save