From 0a688bc0b28c970e9af965b3fa0c7927507eeb97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 30 Nov 2013 14:56:51 +0100 Subject: [PATCH] [youtube] Add support for downloading top lists (fixes #1868) It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie. --- test/test_youtube_lists.py | 8 ++++++++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/youtube.py | 35 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 95f07d129..33db09f43 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -15,6 +15,7 @@ from youtube_dl.extractor import ( YoutubeIE, YoutubeChannelIE, YoutubeShowIE, + YoutubeTopListIE, ) @@ -116,5 +117,12 @@ class TestYoutubeLists(unittest.TestCase): original_video = entries[0] self.assertEqual(original_video['id'], 'rjFaenf1T-Y') + def test_youtube_toplist(self): + dl = FakeYDL() + ie = YoutubeTopListIE(dl) + result = ie.extract('yttoplist:music:Top Tracks') + entries = result['entries'] + self.assertTrue(len(entries) >= 9) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 664639b53..0abf86e44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -194,6 +194,7 @@ from .youtube import ( YoutubeWatchLaterIE, YoutubeFavouritesIE, YoutubeHistoryIE, + YoutubeTopListIE, ) from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 765b4a9bf..a1a4d896d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if len(playlist_id) == 13: # 'RD' + 11 characters for the video id # Mixes require a custom extraction process return self._extract_mix(playlist_id) + if playlist_id.startswith('TL'): + raise ExtractorError(u'For downloading YouTube.com top lists, use ' + u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) # Extract the video ids from the playlist pages ids = [] @@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) +class YoutubeTopListIE(YoutubePlaylistIE): + IE_NAME = u'youtube:toplist' + IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' + u' (Example: "yttoplist:music:Top Tracks")') + _VALID_URL = r'yttoplist:(?P.*?):(?P.*?)$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel = mobj.group('chann') + title = mobj.group('title') + query = compat_urllib_parse.urlencode({'title': title}) + playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) + channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) + link = self._html_search_regex(playlist_re, channel_page, u'list') + url = compat_urlparse.urljoin('https://www.youtube.com/', link) + + video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' + ids = [] + # sometimes the webpage doesn't contain the videos + # retry until we get them + for i in itertools.count(0): + msg = u'Downloading Youtube mix' + if i > 0: + msg += ', retry #%d' % i + webpage = self._download_webpage(url, title, msg) + ids = orderedSet(re.findall(video_re, webpage)) + if ids: + break + url_results = self._ids_to_results(ids) + return self.playlist_result(url_results, playlist_title=title) + + class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"