Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
master
Jaime Marquínez Ferrándiz 11 years ago
parent b6c45014ae
commit 9103bbc5cd

@ -148,6 +148,9 @@ def generator(test_case):
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally:
try_rm_tcs_files()

@ -354,8 +354,11 @@ class YoutubeDL(object):
'_type': 'compat_list',
'entries': ie_result,
}
if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME
self.add_extra_info(ie_result,
{
'extractor': ie.IE_NAME,
'webpage_url': url
})
return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
@ -417,6 +420,7 @@ class YoutubeDL(object):
'playlist': playlist,
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
}
entry_result = self.process_ie_result(entry,
download=download,
@ -427,7 +431,10 @@ class YoutubeDL(object):
elif result_type == 'compat_list':
def _fixup(r):
self.add_extra_info(r,
{'extractor': ie_result['extractor']})
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
})
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)

@ -71,6 +71,9 @@ class InfoExtractor(object):
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings.

@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
elif mobj.group('pro'):
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'):
else:
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0:
raise ExtractorError(u'No known codec found')
return [{
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail,
'description': video_description,
'formats': formats,
}]
'webpage_url': url,
}
class VimeoChannelIE(InfoExtractor):

@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
})
return results

Loading…
Cancel
Save