From 67500bf939d7db66cdfb9f742fdedca1b83b8309 Mon Sep 17 00:00:00 2001 From: xavier Date: Thu, 23 Oct 2014 16:55:39 -0500 Subject: [PATCH 1/5] Initial version of audiomack.py --- youtube_dl/extractor/audiomack.py | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/audiomack.py diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py new file mode 100644 index 000000000..c5214f401 --- /dev/null +++ b/youtube_dl/extractor/audiomack.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import datetime +import time +import urllib.request +import json + + +class AudiomackIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P[\w/-]+)' + _TEST = { + 'url': 'https://www.audiomack.com/song/crewneckkramer/story-i-tell', + 'info_dict': { + 'id': 'story-i-tell', + 'ext': 'mp3', + 'title': 'story-i-tell' + } + } + + def _real_extract(self, url): + # TODO more code goes here, for example ... + #webpage = self._download_webpage(url, video_id) + #title = self._html_search_regex(r'

(.*?)

', webpage, 'title') + + assert("/song/" in url) + songurl = url[url.index("/song/")+5:] + title = songurl[songurl.rindex("/")+1:] + video_id = title + t = int(time.mktime(datetime.datetime.now().timetuple())) + s = "http://www.audiomack.com/api/music/url/song"+songurl+"?_="+str(t) + f = urllib.request.urlopen(s) + j = f.read(1000).decode("utf-8") + data = json.loads(j) + + return { + 'id': video_id, + 'title': title, + 'url' : data["url"], + 'ext' : 'mp3' + # TODO more properties (see youtube_dl/extractor/common.py) + } From 5c565ac9e7fed4f6a7123332cbaf35fc44a5a921 Mon Sep 17 00:00:00 2001 From: xavier Date: Thu, 23 Oct 2014 16:58:11 -0500 Subject: [PATCH 2/5] Added init.py initializer --- youtube_dl/extractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 070f9ff19..29f32cdef 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -20,6 +20,7 @@ from .arte import ( ArteTVDDCIE, ArteTVEmbedIE, ) +from .audiomack import AudiomackIE from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE From 9e9bc793f3abddc4824cfcb13f569163fb0a4ba7 Mon Sep 17 00:00:00 2001 From: xavier Date: Thu, 23 Oct 2014 23:54:59 -0500 Subject: [PATCH 3/5] Finished audiomack extractor --- youtube_dl/extractor/audiomack.py | 82 ++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index c5214f401..2ececa998 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -1,43 +1,67 @@ +# Xavier Beynon 2014 # coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor +from .soundcloud import SoundcloudIE import datetime import time -import urllib.request -import json class AudiomackIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P[\w/-]+)' - _TEST = { - 'url': 'https://www.audiomack.com/song/crewneckkramer/story-i-tell', - 'info_dict': { - 'id': 'story-i-tell', - 'ext': 'mp3', - 'title': 'story-i-tell' + IE_NAME = 'audiomack' + _TESTS = [ + #hosted on audiomack + { + 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', + 'file': 'Roosh Williams - Extraordinary.mp3', + 'info_dict': + { + 'ext': 'mp3', + 'title': 'Roosh Williams - Extraordinary' + } + }, + #hosted on soundcloud via audiomack + { + 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', + 'file': '172419696.mp3', + 'info_dict': + { + 'ext': 'mp3', + 'title': 'Young Thug ft Lil Wayne - Take Kare', + "upload_date": "20141016", + "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", + "uploader": "Young Thug World" + } } - } + ] def _real_extract(self, url): - # TODO more code goes here, for example ... - #webpage = self._download_webpage(url, video_id) - #title = self._html_search_regex(r'

(.*?)

', webpage, 'title') - - assert("/song/" in url) - songurl = url[url.index("/song/")+5:] - title = songurl[songurl.rindex("/")+1:] - video_id = title - t = int(time.mktime(datetime.datetime.now().timetuple())) - s = "http://www.audiomack.com/api/music/url/song"+songurl+"?_="+str(t) - f = urllib.request.urlopen(s) - j = f.read(1000).decode("utf-8") - data = json.loads(j) + #id is what follows /song/ in url, usually the uploader name + title + id = url[url.index("/song/")+5:] - return { - 'id': video_id, - 'title': title, - 'url' : data["url"], - 'ext' : 'mp3' - # TODO more properties (see youtube_dl/extractor/common.py) - } + #Call the api, which gives us a json doc with the real url inside + rightnow = int(time.mktime(datetime.datetime.now().timetuple())) + apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song"+id+"?_="+str(rightnow), id) + if not url in apiresponse: + raise Exception("Unable to deduce api url of song") + realurl = apiresponse["url"] + + #Audiomack wraps a lot of soundcloud tracks in their branded wrapper + # - if so, pass the work off to the soundcloud extractor + if SoundcloudIE.suitable(realurl): + sc = SoundcloudIE(downloader=self._downloader) + return sc._real_extract(realurl) + else: + #Pull out metadata + page = self._download_webpage(url, id) + artist = self._html_search_regex(r'(.*)', page, "artist") + songtitle = self._html_search_regex(r'

.*(.*)

', page, "title") + title = artist+" - "+songtitle + return { + 'id': title, # ignore id, which is not useful in song name + 'title': title, + 'url': realurl, + 'ext': 'mp3' + } From fdfefa1b9c86ef58d943a26a157dc234c2df14d9 Mon Sep 17 00:00:00 2001 From: xavier Date: Fri, 24 Oct 2014 21:07:01 -0500 Subject: [PATCH 4/5] Made changes per phihag --- youtube_dl/__init__.py | 1 + youtube_dl/extractor/audiomack.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7f2b4dfcc..78cdf14df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -79,6 +79,7 @@ __authors__ = ( 'Carlos Ramos', '5moufl', 'lenaten', + 'Xavier Beynon' ) __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 2ececa998..bdcc51235 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -1,9 +1,9 @@ -# Xavier Beynon 2014 # coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from .soundcloud import SoundcloudIE +from ..utils import ExtractorError import datetime import time @@ -15,9 +15,9 @@ class AudiomackIE(InfoExtractor): #hosted on audiomack { 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', - 'file': 'Roosh Williams - Extraordinary.mp3', 'info_dict': { + 'id' : 'roosh-williams/extraordinary', 'ext': 'mp3', 'title': 'Roosh Williams - Extraordinary' } @@ -39,13 +39,14 @@ class AudiomackIE(InfoExtractor): def _real_extract(self, url): #id is what follows /song/ in url, usually the uploader name + title - id = url[url.index("/song/")+5:] + id = self._match_id(url) #Call the api, which gives us a json doc with the real url inside - rightnow = int(time.mktime(datetime.datetime.now().timetuple())) - apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song"+id+"?_="+str(rightnow), id) - if not url in apiresponse: - raise Exception("Unable to deduce api url of song") + rightnow = int(time.time()) + apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song/"+id+"?_="+str(rightnow), id) + + if "url" not in apiresponse: + raise ExtractorError("Unable to deduce api url of song") realurl = apiresponse["url"] #Audiomack wraps a lot of soundcloud tracks in their branded wrapper @@ -60,7 +61,7 @@ class AudiomackIE(InfoExtractor): songtitle = self._html_search_regex(r'

.*(.*)

', page, "title") title = artist+" - "+songtitle return { - 'id': title, # ignore id, which is not useful in song name + 'id': id, # ignore id, which is not useful in song name 'title': title, 'url': realurl, 'ext': 'mp3' From d36cae46d877ebcc656f23a41b53e31731d2b77e Mon Sep 17 00:00:00 2001 From: xavier Date: Fri, 24 Oct 2014 21:11:46 -0500 Subject: [PATCH 5/5] Not directly calling soundcloud extractor anymore --- youtube_dl/extractor/audiomack.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index bdcc51235..2f32253af 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -52,8 +52,7 @@ class AudiomackIE(InfoExtractor): #Audiomack wraps a lot of soundcloud tracks in their branded wrapper # - if so, pass the work off to the soundcloud extractor if SoundcloudIE.suitable(realurl): - sc = SoundcloudIE(downloader=self._downloader) - return sc._real_extract(realurl) + return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'} else: #Pull out metadata page = self._download_webpage(url, id)