Merge remote-tracking branch 'origin/master'

9 years ago · 5bfd430f81
parent 73fac4e911 8fb474fb17
commit 5bfd430f81
16 changed files with 163 additions and 48 deletions
--- a/1
+++ b/1
@ -110,3 +110,4 @@ Shaya Goldberg
 Paul Hartmann
 Frans de Jonge
 Robin de Rooij
 Ryan Schmidt
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 5)
+        self.assertTrue(len(subtitles.keys()) >= 6)
    def test_list_subtitles(self):
        self.DL.expect_warning('Automatic Captions not supported by this server')
@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
    def test_subtitles(self):
        self.DL.params['writesubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
+        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
    def test_subtitles_lang(self):
        self.DL.params['writesubtitles'] = True
@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), set(['cs']))
-        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+        self.assertTrue(len(subtitles['cs']) > 20000)
    def test_nosubtitles(self):
        self.DL.expect_warning('video doesn\'t have subtitles')
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -189,6 +189,7 @@ from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .historicfilms import HistoricFilmsIE
 from .history import HistoryIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
            'duration': int(info['length']),
            'view_count': int(info['views_total']),
            'uploader': info['username'],
-            'uploader_id': info['uid'],
+            'uploader_id': info['owner']['uid'],
        }
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                    formats, subtitles = self._download_media_selector(programme_id)
                return programme_id, title, description, duration, formats, subtitles
        except ExtractorError as ee:
-            if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+            if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
                raise
        # fallback to legacy playlist
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
    _TEST = {
        'url': 'http://beeg.com/5416503',
-        'md5': '634526ae978711f6b748fe0dd6c11f57',
+        'md5': '1bff67111adb785c51d1b42959ec10e5',
        'info_dict': {
            'id': '5416503',
            'ext': 'mp4',
--- a/youtube_dl/extractor/camdemy.py
+++ b/youtube_dl/extractor/camdemy.py
@ -16,7 +16,7 @@ from ..utils import (
 class CamdemyIE(InfoExtractor):
-    _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
    _TESTS = [{
        # single file
        'url': 'http://www.camdemy.com/media/5181/',
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -665,7 +665,7 @@ class InfoExtractor(object):
        return RATING_TABLE.get(rating.lower(), None)
    def _family_friendly_search(self, html):
-        # See http://schema.org/VideoObj
+        # See http://schema.org/VideoObject
        family_friendly = self._html_search_meta('isFamilyFriendly', html)
        if not family_friendly:
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
            'id': '1740434',
            'display_id': 'hot-perky-blonde-naked-golf',
            'ext': 'mp4',
-            'title': 'Hot Perky Blonde Naked Golf',
+            'title': 'hot perky blonde naked golf',
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
            r'<source src="([^"]+)"', webpage, 'video URL')
        title = self._html_search_regex(
-            r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+            [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
            webpage, 'title')
        thumbnail = self._html_search_regex(
            r'poster="([^"]+)"',
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dl/extractor/firsttv.py
@ -1,52 +1,71 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class FirstTVIE(InfoExtractor):
-    IE_NAME = 'firsttv'
+    IE_NAME = '1tv'
-    IE_DESC = 'Видеоархив - Первый канал'
+    IE_DESC = 'Первый канал'
-    _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.1tv.ru/videoarchive/73390',
-        'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
+        'md5': '777f525feeec4806130f4f764bc18a4f',
        'info_dict': {
            'id': '73390',
            'ext': 'mp4',
            'title': 'Олимпийские канатные дороги',
-            'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
-            'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
+            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
            'duration': 149,
            'like_count': int,
            'dislike_count': int,
        },
        'skip': 'Only works from Russia',
    }, {
        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
        'info_dict': {
            'id': '35930',
            'ext': 'mp4',
            'title': 'Наедине со всеми. Людмила Сенчина',
            'description': 'md5:89553aed1d641416001fe8d450f06cb9',
            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
            'duration': 2694,
        },
        'skip': 'Only works from Russia',
-    }
+    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id, 'Downloading page')
        video_url = self._html_search_regex(
-            r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
+            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
            webpage, 'video URL')
        title = self._html_search_regex(
-            r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
+            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
        description = self._html_search_regex(
-            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
+            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
            webpage, 'description', default=None) or self._html_search_meta(
                'description', webpage, 'description')
        thumbnail = self._og_search_thumbnail(webpage)
-        duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
+        duration = self._og_search_property(
            'video:duration', webpage,
            'video duration', fatal=False)
-        like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
+        like_count = self._html_search_regex(
-                                             webpage, 'like count', fatal=False)
+            r'title="Понравилось".*?/></label> \[(\d+)\]',
-        dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
+            webpage, 'like count', default=None)
-                                                webpage, 'dislike count', fatal=False)
+        dislike_count = self._html_search_regex(
            r'title="Не понравилось".*?/></label> \[(\d+)\]',
            webpage, 'dislike count', default=None)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/history.py
+++ b/youtube_dl/extractor/history.py
@ -0,0 +1,31 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import smuggle_url
 class HistoryIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
    _TESTS = [{
        'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
        'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
        'info_dict': {
            'id': 'bLx5Dv5Aka1G',
            'ext': 'mp4',
            'title': "Bet You Didn't Know: Valentine's Day",
            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
        },
        'add_ie': ['ThePlatform'],
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
            r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
            webpage, 'video url')
        return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..compat import (
@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
 class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
-        ((video/.+?/(?P<id>\d+))|
+        (?:video/.+?/(?P<id>\d+)|
-        (feature/[^/]+/(?P<title>.+)))
+        (?:feature|nightly-news)/[^/]+/(?P<title>.+))
        '''
    _TESTS = [
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
            },
        },
        {
            'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
            'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
            'info_dict': {
                'id': 'sekXqyTVnmN3',
                'ext': 'mp4',
                'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
                'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
            },
        },
    ]
    def _real_extract(self, url):
@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
            }
        else:
-            # "feature" pages use theplatform.com
+            # "feature" and "nightly-news" pages use theplatform.com
            title = mobj.group('title')
            webpage = self._download_webpage(url, title)
            bootstrap_json = self._search_regex(
-                r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
+                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
-                flags=re.MULTILINE)
+                webpage, 'bootstrap json', flags=re.MULTILINE)
-            bootstrap = json.loads(bootstrap_json)
+            bootstrap = self._parse_json(bootstrap_json, video_id)
            info = bootstrap['results'][0]['video']
            mpxid = info['mpxId']
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@ -1,14 +1,30 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import hashlib
 import time
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_request,
 )
 from ..utils import (
    int_or_none,
 )
 def _get_api_key(api_path):
    if api_path.endswith('?'):
        api_path = api_path[:-1]
    api_key = 'fb5f58a820353bd7095de526253c14fd'
    a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
    return hashlib.md5(a.encode('ascii')).hexdigest()
 class StreamCZIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
    _API_URL = 'http://www.stream.cz/API'
    _TESTS = [{
        'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        data = self._download_json(
+        api_path = '/episode/%s' % video_id
-            'http://www.stream.cz/API/episode/%s' % video_id, video_id)
+
        req = compat_urllib_request.Request(self._API_URL + api_path)
        req.add_header('Api-Password', _get_api_key(api_path))
        data = self._download_json(req, video_id)
        formats = []
        for quality, video in enumerate(data['video_qualities']):
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
        formats = []
        quality = qualities(['mp4', 'flv'])
-        for video_url in re.findall(r'<source src="([^"]+)"', webpage):
+        for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
            video_ext = determine_ext(video_url)
            formats.append({
                'url': video_url,
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -2,6 +2,11 @@ from __future__ import unicode_literals
 import re
 import json
 import time
 import hmac
 import binascii
 import hashlib
 from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
@ -11,6 +16,7 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    xpath_with_ns,
    unsmuggle_url,
 )
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
 class ThePlatformIE(SubtitlesInfoExtractor):
    _VALID_URL = r'''(?x)
-        (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
+        (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
           (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
         |theplatform:)(?P<id>[^/\?&]+)'''
@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor):
        },
    }
    @staticmethod
    def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
        flags = '10' if include_qs else '00'
        expiration_date = '%x' % (int(time.time()) + life)
        def str_to_hex(str):
            return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
        def hex_to_str(hex):
            return binascii.a2b_hex(hex)
        relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
        clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
        checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
        sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
        return '%s&sig=%s' % (url, sig)
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
        mobj = re.match(self._VALID_URL, url)
        provider_id = mobj.group('provider_id')
        video_id = mobj.group('id')
        if not provider_id:
            provider_id = 'dJ5BDC'
        if mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor):
            config = self._download_json(config_url, video_id, 'Downloading config')
            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
        else:
-            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
+            smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
-                        'format=smil&mbr=true'.format(video_id))
+                        'format=smil&mbr=true'.format(provider_id, video_id))
        sig = smuggled_data.get('sig')
        if sig:
            smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
        meta = self._download_xml(smil_url, video_id)
        try:
@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
        else:
            raise ExtractorError(error_msg, expected=True)
-        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
+        info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -138,7 +138,7 @@ class FFmpegPostProcessor(PostProcessor):
        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
@ -178,8 +178,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                encodeArgument('-show_streams'),
                encodeFilename(self._ffmpeg_filename_argument(path), True)]
            if self._downloader.params.get('verbose', False):
-                self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd))
+                self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
-            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
+            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
            output = handle.communicate()[0]
            if handle.wait() != 0:
                return None