From b1c3a49fffb7109125a2ad215f412f1198e3dffd Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Sun, 12 Oct 2014 08:32:26 +0800 Subject: [PATCH 1/8] apply ratelimit to f4m --- youtube_dl/downloader/f4m.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b3be16ff1..54dd6ac3f 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -230,6 +230,7 @@ class F4mFD(FileDownloader): 'continuedl': True, 'quiet': True, 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), 'test': self.params.get('test', False), }) From 1557ed153c42176335f2ef069661ef2c98f16ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 13 Dec 2014 12:39:58 +0100 Subject: [PATCH 2/8] [test_unicode_literals] Import from test.helper --- test/test_unicode_literals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index d3cba869b..19813e034 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -20,7 +20,7 @@ IGNORED_FILES = [ ] -from helper import assertRegexpMatches +from test.helper import assertRegexpMatches class TestUnicodeLiterals(unittest.TestCase): From 8085fc15ccb161d96eee607bf21dfdb36b890b20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Dec 2014 18:42:29 +0600 Subject: [PATCH 3/8] [adultswim] Improve segment duration extraction --- youtube_dl/extractor/adultswim.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 39e4ca296..502a9c25a 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -7,6 +7,8 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + xpath_text, + float_or_none, ) @@ -128,7 +130,8 @@ class AdultSwimIE(InfoExtractor): segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') - segment_duration = idoc.find('.//trt').text.strip() + segment_duration = float_or_none( + xpath_text(idoc, './/trt', 'segment duration').strip()) formats = [] file_els = idoc.findall('.//files/file') From 04c95441871775dfdaa0ea3eeca0a5d23f270f2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Dec 2014 18:47:34 +0600 Subject: [PATCH 4/8] [bbccouk] Fix vpid warning --- youtube_dl/extractor/bbccouk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 01c02d360..2d2f742ae 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -209,7 +209,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') programme_id = self._search_regex( - r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False) + r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) if programme_id: player = self._download_json( 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, From 1fa174692aae06406c9524fec392a131f10c68fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Dec 2014 02:00:54 +0600 Subject: [PATCH 5/8] [bandcamp:album] Make path optional (Closes #4461) --- youtube_dl/extractor/bandcamp.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9fb770cb1..bcb2821cd 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -106,7 +106,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -141,6 +141,12 @@ class BandcampAlbumIE(InfoExtractor): 'title': 'Hierophany of the Open Grave', }, 'playlist_mincount': 9, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'title': 'Loom', + }, + 'playlist_mincount': 7, }] def _real_extract(self, url): From 4a0132c570e29075e377c8b70d7f61018d1e4479 Mon Sep 17 00:00:00 2001 From: Mathias Rav <rav@cs.au.dk> Date: Sat, 13 Dec 2014 21:58:52 +0100 Subject: [PATCH 6/8] [Restudy] Add new extractor for restudy.dk --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/restudy.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/restudy.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 119ec2044..3ae7a8a52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -316,6 +316,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .restudy import RestudyIE from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py new file mode 100644 index 000000000..56a6c0f93 --- /dev/null +++ b/youtube_dl/extractor/restudy.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RestudyIE(InfoExtractor): + _VALID_URL = r'https://www.restudy.dk/video/play/id/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.restudy.dk/video/play/id/1637', + # MD5 sum of first 10241 bytes of the video file, as reported by + # head -c 10241 Leiden-frosteffekt-1637.mp4 | md5sum + 'md5': '4e755c4287f292a1fe5363834a683818', + 'info_dict': { + 'id': '1637', + 'ext': 'mp4', + 'title': 'Leiden-frosteffekt', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + xml_url = ( + 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' + % video_id) + xml = self._download_webpage(xml_url, video_id) + + base = self._search_regex( + r'<meta base="([^"]+)', xml, 'meta base') + # TODO: Provide multiple video qualities instead of forcing highest + filename = self._search_regex( + r'<video src="mp4:([^"]+_high\.mp4)', xml, 'filename') + url = '%s%s' % (base, filename) + title = self._og_search_title(webpage) + return { + 'id': video_id, + 'title': title, + 'url': url, + 'protocol': 'rtmp', + } From ac265bef1e685706c0be89237117e2df5f796ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 14 Dec 2014 03:41:00 +0600 Subject: [PATCH 7/8] [restudy] Simplify and extract all formats --- youtube_dl/extractor/restudy.py | 37 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py index 56a6c0f93..b17c2bfc0 100644 --- a/youtube_dl/extractor/restudy.py +++ b/youtube_dl/extractor/restudy.py @@ -5,37 +5,36 @@ from .common import InfoExtractor class RestudyIE(InfoExtractor): - _VALID_URL = r'https://www.restudy.dk/video/play/id/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)' _TEST = { 'url': 'https://www.restudy.dk/video/play/id/1637', - # MD5 sum of first 10241 bytes of the video file, as reported by - # head -c 10241 Leiden-frosteffekt-1637.mp4 | md5sum - 'md5': '4e755c4287f292a1fe5363834a683818', 'info_dict': { 'id': '1637', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Leiden-frosteffekt', + 'description': 'Denne video er et eksperiment med flydende kvælstof.', + }, + 'params': { + # rtmp download + 'skip_download': True, } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - xml_url = ( - 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' - % video_id) - xml = self._download_webpage(xml_url, video_id) - - base = self._search_regex( - r'<meta base="([^"]+)', xml, 'meta base') - # TODO: Provide multiple video qualities instead of forcing highest - filename = self._search_regex( - r'<video src="mp4:([^"]+_high\.mp4)', xml, 'filename') - url = '%s%s' % (base, filename) - title = self._og_search_title(webpage) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + + formats = self._extract_smil_formats( + 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, + video_id) + return { 'id': video_id, 'title': title, - 'url': url, - 'protocol': 'rtmp', + 'description': description, + 'formats': formats, } From 2564300e55ec30754cbc416aaa134cbb0da1a5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 14 Dec 2014 03:42:42 +0600 Subject: [PATCH 8/8] Credit @Mortal for restudy (#4463) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bfa00f91b..5d45e02fc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -92,3 +92,4 @@ Tithen-Firion Zack Fernandes cryptonaut Adrian Kretz +Mathias Rav