Browse Source

Add various anime sites (Closes #4554)

master
Philipp Hagemeister 8 years ago
parent
commit
b68ff25917
  1. 1
      AUTHORS
  2. 19
      youtube_dl/extractor/__init__.py
  3. 76
      youtube_dl/extractor/gogoanime.py
  4. 149
      youtube_dl/extractor/play44.py
  5. 74
      youtube_dl/extractor/soulanime.py
  6. 36
      youtube_dl/extractor/videofun.py

1
AUTHORS

@ -98,3 +98,4 @@ Will Glynn
Max Reimann
Cédric Luthi
Thijs Vermeir
Joel Leclerc

19
youtube_dl/extractor/__init__.py

@ -164,6 +164,10 @@ from .globo import GloboIE
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .gogoanime import (
GoGoAnimeIE,
GoGoAnimeSearchIE
)
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
from .played import PlayedIE
from .play44 import (
Play44IE,
ByZooIE,
Video44IE,
VideoWingIE,
PlayPandaIE,
VideoZooIE,
PlayBBIE,
EasyVideoIE
)
from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
@ -373,6 +387,10 @@ from .smotri import (
from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE
from .soulanime import (
SoulAnimeWatchingIE,
SoulAnimeSeriesIE
)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@ -467,6 +485,7 @@ from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofun import VideoFunIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE

76
youtube_dl/extractor/gogoanime.py

@ -0,0 +1,76 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
get_element_by_attribute,
unescapeHTML
)
class GoGoAnimeIE(InfoExtractor):
IE_NAME = 'gogoanime'
IE_DESC = 'GoGoAnime'
_VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
_TEST = {
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-movie-1'
},
'playlist_count': 3
}
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
if 'Oops! Page Not Found</font>' in page:
raise ExtractorError('Video does not exist', expected=True)
content = get_element_by_attribute("class", "postcontent", page)
vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
vids = [
unescapeHTML(compat_urllib_parse.unquote(x))
for x in vids if not re.search(r".*videofun.*", x)]
if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
title = self._html_search_regex(
r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
return {
'_type': 'url',
'id': video_id,
'url': vids[0],
'title': title,
}
class GoGoAnimeSearchIE(InfoExtractor):
IE_NAME = 'gogoanime:search'
IE_DESC = 'GoGoAnime Search'
_VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
_TEST = {
'url': 'http://www.gogoanime.com/?s=bokusatsu',
'info_dict': {
'id': 'bokusatsu'
},
'playlist_count': 6
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
posts = re.findall(
r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
webpage)
return self.playlist_result(
[self.url_result(p) for p in posts], playlist_id)

149
youtube_dl/extractor/play44.py

@ -0,0 +1,149 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class Play44IE(InfoExtractor):
_VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-07',
'ext': 'flv',
'title': 'mahou-shoujo-madoka-magica-07',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'_url = "(https?://[^"]+?)";', page, 'url'))
title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}
class ByZooIE(Play44IE):
_VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
'md5': '455c83dabe2cd9fd74a87612b01fe017',
'info_dict': {
'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
'ext': 'mp4',
'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
}
}]
class Video44IE(Play44IE):
_VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
'md5': '43eaec6d0beb10e8d42459b9f108aff3',
'info_dict': {
'id': 'chaoshead-12',
'ext': 'mp4',
'title': 'chaoshead-12',
}
}]
class VideoWingIE(Play44IE):
_VALID_URL = r'''(?x)
http://[w.]*videowing\.[^/]*/
(?:
.*video=/*
|embed/
)
(?P<id>[^&?.]+)
'''
_TESTS = [{
'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}, {
'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
'md5': '33fdd71581357018c226f95c5cedcfd7',
'info_dict': {
'id': 'mahoushoujomadokamagicamovie1part1',
'ext': 'flv',
'title': 'mahoushoujomadokamagicamovie1part1',
}
}]
class PlayPandaIE(Play44IE):
_VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
}
}]
class VideoZooIE(Play44IE):
_VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class PlayBBIE(Play44IE):
_VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class EasyVideoIE(Play44IE):
_VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
_TESTS = [{
'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
'md5': '26178b57629b7650106d72b191137176',
'info_dict': {
'id': 'bokuwatomodachigasukunai-04',
'ext': 'mp4',
'title': 'bokuwatomodachigasukunai-04',
},
'skip': 'Blocked in Germany',
}]

74
youtube_dl/extractor/soulanime.py

@ -0,0 +1,74 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SoulAnimeWatchingIE(InfoExtractor):
IE_NAME = "soulanime:watching"
IE_DESC = "SoulAnime video"
_TEST = {
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
'md5': '05fae04abf72298098b528e98abf4298',
'info_dict': {
'id': 'seirei-tsukai-no-blade-dance-episode-9',
'ext': 'mp4',
'title': 'seirei-tsukai-no-blade-dance-episode-9',
'description': 'seirei-tsukai-no-blade-dance-episode-9'
}
}
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
domain = mobj.group('domain')
page = self._download_webpage(url, video_id)
video_url_encoded = self._html_search_regex(
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
video_url = "http://www.soul-anime." + domain + video_url_encoded
vid = self._request_webpage(video_url, video_id)
ext = vid.info().gettype().split("/")[1]
return {
'id': video_id,
'url': video_url,
'ext': ext,
'title': video_id,
'description': video_id
}
class SoulAnimeSeriesIE(InfoExtractor):
IE_NAME = "soulanime:series"
IE_DESC = "SoulAnime Series"
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
_TEST = {
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
'info_dict': {
'id': 'black-rock-shooter-tv'
},
'playlist_count': 8
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
series_id = mobj.group('id')
domain = mobj.group('domain')
pattern = re.compile(self._EPISODE_REGEX)
page = self._download_webpage(url, series_id, "Downloading series page")
mobj = pattern.findall(page)
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
return self.playlist_result(entries, series_id)

36
youtube_dl/extractor/videofun.py

@ -0,0 +1,36 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class VideoFunIE(InfoExtractor):
_VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'Mahou-Shoujo-Madoka-Magica-07',
'ext': 'flv',
'title': 'Mahou-Shoujo-Madoka-Magica-07',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
video_url_encoded = self._html_search_regex(
r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url_encoded)
title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}
Loading…
Cancel
Save