From 03339b7b5bb8f563eace8826512e2f7a4baba415 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 26 Jun 2015 18:25:43 +0100 Subject: [PATCH 1/9] [snagfilms] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/snagfilms.py | 67 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/snagfilms.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc1a302e6..3b906b880 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -492,6 +492,7 @@ from .smotri import ( SmotriUserIE, SmotriBroadcastIE, ) +from .snagfilms import SnagFilmsIE from .snotr import SnotrIE from .sohu import SohuIE from .soompi import ( diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py new file mode 100644 index 000000000..33832855f --- /dev/null +++ b/youtube_dl/extractor/snagfilms.py @@ -0,0 +1,67 @@ +from .common import InfoExtractor +from ..utils import js_to_json +from re import DOTALL + +class SnagFilmsIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www.)?snagfilms\.com/films/title/(?P.+?)(?:/|$)' + _TEST = { + 'url': 'http://www.snagfilms.com/films/title/lost_for_life', + 'info_dict': + { + 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017', + 'display_id': 'lost_for_life', + 'ext': 'mp4', + 'title': 'Lost for Life', + 'duration': 4489, + 'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rofé (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?
', + 'categories': ['Documentary','Crime','Award Winning','Festivals'] + } + } + + def _real_extract(self, url): + display_id = self._search_regex( + self._VALID_URL, + url, + 'display_id', + group='display_id' + ) + webpage = self._download_webpage(url, display_id) + + json_data = self._parse_json(self._html_search_regex( + r'"data":{"film":(?P{.*?}})}', + webpage, + 'data', + group='data' + ), display_id) + title = json_data['title'] + video_id = json_data['id'] + duration = int(json_data['duration']) + description = json_data['synopsis'] + categories = [category['title'] for category in json_data['categories']] + + embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) + sources = self._parse_json(js_to_json(self._html_search_regex( + r'sources: (?P\[.*?\])', + embed_webpage, + 'sources', + group='sources', + flags=DOTALL + )), video_id) + + formats = [] + for source in sources: + if source['type'] == 'm3u8': + formats.extend(self._extract_m3u8_formats(source['file'], video_id)) + else: + formats.append({'url': source['file'],'ext': source['type'], 'resolution': source['label']}) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'duration': duration, + 'description': description, + 'categories': categories, + 'formats': formats, + } From 7e0480ae0e7a9d0b3c73366b8d9ecf86226ad5a8 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 26 Jun 2015 21:50:27 +0100 Subject: [PATCH 2/9] convert tabs to 4 spaces identation --- youtube_dl/extractor/snagfilms.py | 114 +++++++++++++++--------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 33832855f..212fefef7 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -3,65 +3,65 @@ from ..utils import js_to_json from re import DOTALL class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www.)?snagfilms\.com/films/title/(?P.+?)(?:/|$)' - _TEST = { - 'url': 'http://www.snagfilms.com/films/title/lost_for_life', - 'info_dict': - { - 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017', - 'display_id': 'lost_for_life', - 'ext': 'mp4', - 'title': 'Lost for Life', - 'duration': 4489, - 'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rofé (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?
', - 'categories': ['Documentary','Crime','Award Winning','Festivals'] - } - } + _VALID_URL = r'(?:https?://)?(?:www.)?snagfilms\.com/films/title/(?P.+?)(?:/|$)' + _TEST = { + 'url': 'http://www.snagfilms.com/films/title/lost_for_life', + 'info_dict': + { + 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017', + 'display_id': 'lost_for_life', + 'ext': 'mp4', + 'title': 'Lost for Life', + 'duration': 4489, + 'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rofé (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?
', + 'categories': ['Documentary','Crime','Award Winning','Festivals'] + } + } - def _real_extract(self, url): - display_id = self._search_regex( - self._VALID_URL, - url, - 'display_id', - group='display_id' - ) - webpage = self._download_webpage(url, display_id) + def _real_extract(self, url): + display_id = self._search_regex( + self._VALID_URL, + url, + 'display_id', + group='display_id' + ) + webpage = self._download_webpage(url, display_id) - json_data = self._parse_json(self._html_search_regex( - r'"data":{"film":(?P{.*?}})}', - webpage, - 'data', - group='data' - ), display_id) - title = json_data['title'] - video_id = json_data['id'] - duration = int(json_data['duration']) - description = json_data['synopsis'] - categories = [category['title'] for category in json_data['categories']] + json_data = self._parse_json(self._html_search_regex( + r'"data":{"film":(?P{.*?}})}', + webpage, + 'data', + group='data' + ), display_id) + title = json_data['title'] + video_id = json_data['id'] + duration = int(json_data['duration']) + description = json_data['synopsis'] + categories = [category['title'] for category in json_data['categories']] - embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - sources = self._parse_json(js_to_json(self._html_search_regex( - r'sources: (?P\[.*?\])', - embed_webpage, - 'sources', - group='sources', - flags=DOTALL - )), video_id) + embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) + sources = self._parse_json(js_to_json(self._html_search_regex( + r'sources: (?P\[.*?\])', + embed_webpage, + 'sources', + group='sources', + flags=DOTALL + )), video_id) - formats = [] - for source in sources: - if source['type'] == 'm3u8': - formats.extend(self._extract_m3u8_formats(source['file'], video_id)) - else: - formats.append({'url': source['file'],'ext': source['type'], 'resolution': source['label']}) - self._sort_formats(formats) + formats = [] + for source in sources: + if source['type'] == 'm3u8': + formats.extend(self._extract_m3u8_formats(source['file'], video_id)) + else: + formats.append({'url': source['file'],'ext': source['type'], 'resolution': source['label']}) + self._sort_formats(formats) - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'duration': duration, - 'description': description, - 'categories': categories, - 'formats': formats, - } + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'duration': duration, + 'description': description, + 'categories': categories, + 'formats': formats, + } From fd40bdc0be8984bf6043e70796c1d465a0499d03 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 26 Jun 2015 21:56:15 +0100 Subject: [PATCH 3/9] remove unnecessary symbolic name for group --- youtube_dl/extractor/snagfilms.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 212fefef7..60354b9af 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -31,7 +31,6 @@ class SnagFilmsIE(InfoExtractor): r'"data":{"film":(?P{.*?}})}', webpage, 'data', - group='data' ), display_id) title = json_data['title'] video_id = json_data['id'] @@ -44,7 +43,6 @@ class SnagFilmsIE(InfoExtractor): r'sources: (?P\[.*?\])', embed_webpage, 'sources', - group='sources', flags=DOTALL )), video_id) From 7d7d4690259f343385a240efcef8d157fc99c72d Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 00:13:14 +0100 Subject: [PATCH 4/9] add support for embed links --- youtube_dl/extractor/snagfilms.py | 48 ++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 60354b9af..cd345474e 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -1,10 +1,10 @@ +from re import match,DOTALL from .common import InfoExtractor from ..utils import js_to_json -from re import DOTALL class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www.)?snagfilms\.com/films/title/(?P.+?)(?:/|$)' - _TEST = { + _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:films/title/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?:&|/|$)' + _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'info_dict': { @@ -16,29 +16,48 @@ class SnagFilmsIE(InfoExtractor): 'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rofé (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?
', 'categories': ['Documentary','Crime','Award Winning','Festivals'] } - } + },{ + 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831', + 'info_dict': + { + 'id': '74849a00-85a9-11e1-9660-123139220831', + 'display_id': 'while_we_watch', + 'ext': 'mp4', + 'title': '#whilewewatch', + 'duration': 2311, + 'description': 'A gripping portrait of the Occupy Wall Street media revolution, #WHILEWEWATCH is the first definitive film to emerge from Zuccotti Park—with full access and cooperation from masterminds who made #OccupyWallStreet a reality. The #OccupyWallStreet media team had no fear of a critical city government, big corporations, hostile police or a lagging mainstream media to tell their story. Through rain, snow, grueling days and sleeping on concrete, they pump out exhilarating ideas to the world. With little money, they rely on Twitter, texting, Wi-Fi, posters, Tumblr, live streams, YouTube, Facebook, dramatic marches, drumbeats and chants. As the film unfolds, we witness the burgeoning power of social media.
', + 'categories': ['Documentary','Politics'] + } + }] def _real_extract(self, url): - display_id = self._search_regex( - self._VALID_URL, - url, - 'display_id', - group='display_id' - ) - webpage = self._download_webpage(url, display_id) + display_id, video_id = match(self._VALID_URL,url).groups() + if display_id is None: + embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) + + display_id = self._html_search_regex( + r"snagfilms\.com/films/title/(?P.+?)(?:/|')", + embed_webpage, + 'display_id' + ) + webpage = self._download_webpage('http://www.snagfilms.com/films/title/' + display_id, display_id) json_data = self._parse_json(self._html_search_regex( r'"data":{"film":(?P{.*?}})}', webpage, - 'data', + 'data' ), display_id) + title = json_data['title'] - video_id = json_data['id'] duration = int(json_data['duration']) description = json_data['synopsis'] categories = [category['title'] for category in json_data['categories']] + thumbnail = json_data['image'] + + if video_id is None: + video_id = json_data['id'] + embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) sources = self._parse_json(js_to_json(self._html_search_regex( r'sources: (?P\[.*?\])', embed_webpage, @@ -61,5 +80,6 @@ class SnagFilmsIE(InfoExtractor): 'duration': duration, 'description': description, 'categories': categories, + 'thumbnail': thumbnail, 'formats': formats, } From f39eb98bab497d8e06b9f243a8240509326678f0 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 10:55:25 +0100 Subject: [PATCH 5/9] download all pages before start extracting info --- youtube_dl/extractor/snagfilms.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index cd345474e..74cd2698d 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -34,7 +34,6 @@ class SnagFilmsIE(InfoExtractor): display_id, video_id = match(self._VALID_URL,url).groups() if display_id is None: embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - display_id = self._html_search_regex( r"snagfilms\.com/films/title/(?P.+?)(?:/|')", embed_webpage, @@ -48,16 +47,16 @@ class SnagFilmsIE(InfoExtractor): 'data' ), display_id) + if video_id is None: + video_id = json_data['id'] + embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) + title = json_data['title'] duration = int(json_data['duration']) description = json_data['synopsis'] categories = [category['title'] for category in json_data['categories']] thumbnail = json_data['image'] - if video_id is None: - video_id = json_data['id'] - embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - sources = self._parse_json(js_to_json(self._html_search_regex( r'sources: (?P\[.*?\])', embed_webpage, From 654fd03c73fa0e4407a71c07d821b45321c3cdb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jun 2015 18:20:42 +0600 Subject: [PATCH 6/9] [snagfilms] Improve and simplify --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/snagfilms.py | 173 +++++++++++++++++++----------- 2 files changed, 115 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 59068a8b8..7e74a971d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -493,7 +493,10 @@ from .smotri import ( SmotriUserIE, SmotriBroadcastIE, ) -from .snagfilms import SnagFilmsIE +from .snagfilms import ( + SnagFilmsIE, + SnagFilmsEmbedIE, +) from .snotr import SnotrIE from .sohu import SohuIE from .soompi import ( diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 74cd2698d..6e103bd49 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -1,84 +1,133 @@ -from re import match,DOTALL +from __future__ import unicode_literals + +import re + from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + js_to_json, + parse_duration, +) -class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:films/title/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?:&|/|$)' + +class SnagFilmsEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P[\da-f-]{36})' _TESTS = [{ - 'url': 'http://www.snagfilms.com/films/title/lost_for_life', - 'info_dict': - { - 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017', - 'display_id': 'lost_for_life', - 'ext': 'mp4', - 'title': 'Lost for Life', - 'duration': 4489, - 'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rofé (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?
', - 'categories': ['Documentary','Crime','Award Winning','Festivals'] - } - },{ - 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831', - 'info_dict': - { + 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', + 'md5': '2924e9215c6eff7a55ed35b72276bd93', + 'info_dict': { 'id': '74849a00-85a9-11e1-9660-123139220831', - 'display_id': 'while_we_watch', 'ext': 'mp4', 'title': '#whilewewatch', - 'duration': 2311, - 'description': 'A gripping portrait of the Occupy Wall Street media revolution, #WHILEWEWATCH is the first definitive film to emerge from Zuccotti Park—with full access and cooperation from masterminds who made #OccupyWallStreet a reality. The #OccupyWallStreet media team had no fear of a critical city government, big corporations, hostile police or a lagging mainstream media to tell their story. Through rain, snow, grueling days and sleeping on concrete, they pump out exhilarating ideas to the world. With little money, they rely on Twitter, texting, Wi-Fi, posters, Tumblr, live streams, YouTube, Facebook, dramatic marches, drumbeats and chants. As the film unfolds, we witness the burgeoning power of social media.
', - 'categories': ['Documentary','Politics'] } + }, { + 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', + 'only_matching': True, }] def _real_extract(self, url): - display_id, video_id = match(self._VALID_URL,url).groups() - if display_id is None: - embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - display_id = self._html_search_regex( - r"snagfilms\.com/films/title/(?P.+?)(?:/|')", - embed_webpage, - 'display_id' - ) - webpage = self._download_webpage('http://www.snagfilms.com/films/title/' + display_id, display_id) - - json_data = self._parse_json(self._html_search_regex( - r'"data":{"film":(?P{.*?}})}', - webpage, - 'data' - ), display_id) - - if video_id is None: - video_id = json_data['id'] - embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - - title = json_data['title'] - duration = int(json_data['duration']) - description = json_data['synopsis'] - categories = [category['title'] for category in json_data['categories']] - thumbnail = json_data['image'] - - sources = self._parse_json(js_to_json(self._html_search_regex( - r'sources: (?P\[.*?\])', - embed_webpage, - 'sources', - flags=DOTALL - )), video_id) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) formats = [] - for source in sources: - if source['type'] == 'm3u8': - formats.extend(self._extract_m3u8_formats(source['file'], video_id)) + for source in self._parse_json(js_to_json(self._search_regex( + r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id): + file_ = source.get('file') + if not file_: + continue + type_ = source.get('type') + format_id = source.get('label') + ext = determine_ext(file_) + if any(_ == 'm3u8' for _ in (type_, ext)): + formats.extend(self._extract_m3u8_formats( + file_, video_id, 'mp4', m3u8_id='hls')) else: - formats.append({'url': source['file'],'ext': source['type'], 'resolution': source['label']}) + bitrate = int_or_none(self._search_regex( + r'(\d+)kbps', file_, 'bitrate', default=None)) + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + formats.append({ + 'url': file_, + 'format_id': format_id, + 'tbr': bitrate, + 'height': height, + }) self._sort_formats(formats) + title = self._search_regex( + [r"title\s*:\s*'([^']+)'", r'([^<]+)'], + webpage, 'title') + return { 'id': video_id, + 'title': title, + 'formats': formats, + } + + +class SnagFilmsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/films/title/(?P[^/]+)' + _TEST = { + 'url': 'http://www.snagfilms.com/films/title/lost_for_life', + 'md5': '19844f897b35af219773fd63bdec2942', + 'info_dict': { + 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017', + 'display_id': 'lost_for_life', + 'ext': 'mp4', + 'title': 'Lost for Life', + 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 4489, + 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals'] + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id') + + snag = self._parse_json( + self._search_regex( + 'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'), + display_id) + + for item in snag: + if item.get('data', {}).get('film', {}).get('id') == film_id: + data = item['data']['film'] + title = data['title'] + description = clean_html(data.get('synopsis')) + thumbnail = data.get('image') + duration = int_or_none(data.get('duration') or data.get('runtime')) + categories = [ + category['title'] for category in data.get('categories', []) + if category.get('title')] + break + else: + title = self._search_regex( + r'itemprop="title">([^<]+)<', webpage, 'title') + description = self._html_search_regex( + r'(?s)
(.+?)
', + webpage, 'description', default=None) or self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = parse_duration(self._search_regex( + r'([^<]+)<', + webpage, 'duration', fatal=False)) + categories = re.findall(r'([^<]+)', webpage) + + return { + '_type': 'url_transparent', + 'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id, + 'id': film_id, 'display_id': display_id, 'title': title, - 'duration': duration, 'description': description, - 'categories': categories, 'thumbnail': thumbnail, - 'formats': formats, + 'duration': duration, + 'categories': categories, } From 7c197ad96dd0f36177eda66777c93502228fc36b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jun 2015 18:25:50 +0600 Subject: [PATCH 7/9] [snagfilms] Add routine for generic embeds extractions --- youtube_dl/extractor/snagfilms.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 6e103bd49..cb52eb72b 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -27,6 +27,13 @@ class SnagFilmsEmbedIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From eedd20ef9637660d5585b1b7d221d7f2d31630ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jun 2015 18:26:14 +0600 Subject: [PATCH 8/9] [extractor/generic] Add support for snagfilms embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 42e4e7035..5b1da47e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,6 +47,7 @@ from .xhamster import XHamsterEmbedIE from .vimeo import VimeoIE from .dailymotion import DailymotionCloudIE from .onionstudios import OnionStudiosIE +from .snagfilms import SnagFilmsEmbedIE class GenericIE(InfoExtractor): @@ -1550,6 +1551,11 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) + # Look for SnagFilms embeds + snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) + if snagfilms_url: + return self.url_result(snagfilms_url) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', From b8c1cc1a51c16682725cf382f0e498a390c62a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jun 2015 18:28:10 +0600 Subject: [PATCH 9/9] [extractor/generic] Add test for snagfilms embeds --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5b1da47e3..32e41d13e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -850,6 +850,15 @@ class GenericIE(InfoExtractor): 'uploader_id': 'clickhole', } }, + # SnagFilms embed + { + 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html', + 'info_dict': { + 'id': '74849a00-85a9-11e1-9660-123139220831', + 'ext': 'mp4', + 'title': '#whilewewatch', + } + }, # AdobeTVVideo embed { 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',