From 245023a86145f7074dacdab4c735dea268d766ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Aug 2016 23:51:18 +0700 Subject: [PATCH] [pyvideo] Fix extraction (Closes #10468) --- youtube_dl/extractor/pyvideo.py | 96 +++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index cc0416cb8..08ec09183 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -1,59 +1,73 @@ from __future__ import unicode_literals import re -import os from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none class PyvideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P\d+)/(.*)' - - _TESTS = [ - { - 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', - 'md5': '520915673e53a5c5d487c36e0c4d85b5', - 'info_dict': { - 'id': '24_4WWkSmNo', - 'ext': 'webm', - 'title': 'Become a logging expert in 30 minutes', - 'description': 'md5:9665350d466c67fb5b1598de379021f7', - 'upload_date': '20130320', - 'uploader': 'Next Day Video', - 'uploader_id': 'NextDayVideo', - }, - 'add_ie': ['Youtube'], + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P[^/]+)/(?P[^/?#&.]+)' + + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', }, - { - 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', - 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', - 'info_dict': { - 'id': '2542', - 'ext': 'm4v', - 'title': 'Gloriajw-SpotifyWithErikBernhardsson182', - }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', }, - ] + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + category = mobj.group('category') video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + entries = [] - m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) - if m_youtube is not None: - return self.url_result(m_youtube.group(1), 'Youtube') + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) - title = self._html_search_regex( - r'