Allow iterators for playlist result entries

master
Philipp Hagemeister 10 years ago
parent 158f8cadc0
commit b82f815f37

@ -7,6 +7,7 @@ import collections
import datetime import datetime
import errno import errno
import io import io
import itertools
import json import json
import locale import locale
import os import os
@ -654,21 +655,28 @@ class YoutubeDL(object):
if playlistend == -1: if playlistend == -1:
playlistend = None playlistend = None
if isinstance(ie_result['entries'], list): ie_entries = ie_result['entries']
n_all_entries = len(ie_result['entries']) if isinstance(ie_entries, list):
entries = ie_result['entries'][playliststart:playlistend] n_all_entries = len(ie_entries)
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" % "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries)) (ie_result['extractor'], playlist, n_all_entries, n_entries))
else: elif isinstance(ie_entries, PagedList):
assert isinstance(ie_result['entries'], PagedList) entries = ie_entries.getslice(
entries = ie_result['entries'].getslice(
playliststart, playlistend) playliststart, playlistend)
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Downloading %d videos" % "[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries)) (ie_result['extractor'], playlist, n_entries))
else: # iterable
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))

@ -158,8 +158,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos. _type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each There must be a key "entries", which is a list, an iterable, or a PagedList
element of which is a valid dictionary under this specfication. object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above). semantics as videos (see above).

@ -1262,8 +1262,12 @@ class YoutubeChannelIE(InfoExtractor):
# The videos are contained in a single page # The videos are contained in a single page
# the ajax pages can't be used, they are empty # the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page) video_ids = self.extract_videos_from_page(channel_page)
else: entries = [
# Download all channel pages using the json-based channel_ajax query self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(entries, channel_id)
def _entries():
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id) url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json( page = self._download_json(
@ -1271,16 +1275,14 @@ class YoutubeChannelIE(InfoExtractor):
transform_source=uppercase_escape) transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html']) ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page) for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break break
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) return self.playlist_result(_entries(), channel_id)
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):

Loading…
Cancel
Save