Merge branch 'oskar456-playtvak'

master
Sergey M․ 9 years ago
commit d90e4bdb74

@ -449,6 +449,7 @@ from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .podomatic import PodomaticIE

@ -0,0 +1,166 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
qualities,
)
class PlaytvakIE(InfoExtractor):
IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
_VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
'md5': '4525ae312c324b4be2f4603cc78ceb4a',
'info_dict': {
'id': 'A150730_150323_hodinovy-manzel_kuko',
'ext': 'mp4',
'title': 'Vyžeňte vosy a sršně ze zahrady',
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
'duration': 279,
'timestamp': 1438732860,
'upload_date': '20150805',
'is_live': False,
}
}, { # live video test
'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
'info_dict': {
'id': 'A150624_164934_planespotting_cat',
'ext': 'flv',
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
'is_live': True,
},
'params': {
'skip_download': True, # requires rtmpdump
},
}, { # idnes.cz
'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
'md5': '819832ba33cd7016e58a6658577fe289',
'info_dict': {
'id': 'A150809_104116_domaci_pku',
'ext': 'mp4',
'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
'duration': 39,
'timestamp': 1438969140,
'upload_date': '20150807',
'is_live': False,
}
}, { # lidovky.cz
'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
'info_dict': {
'id': 'A150808_214044_ln-video_ELE',
'ext': 'mp4',
'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
'timestamp': 1439052180,
'upload_date': '20150808',
'is_live': False,
}
}, {
'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info_url = self._html_search_regex(
r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
parsed_url = compat_urlparse.urlparse(info_url)
qs = compat_urlparse.parse_qs(parsed_url.query)
qs.update({
'reklama': ['0'],
'type': ['js'],
})
info_url = compat_urlparse.urlunparse(
parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True)))
json_info = self._download_json(
info_url, video_id,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
item = None
for i in json_info['items']:
if i.get('type') == 'video' or i.get('type') == 'stream':
item = i
break
if not item:
raise ExtractorError('No suitable stream found')
quality = qualities(['low', 'middle', 'high'])
formats = []
for fmt in item['video']:
video_url = fmt.get('file')
if not video_url:
continue
format_ = fmt['format']
format_id = '%s_%s' % (format_, fmt['quality'])
preference = None
if format_ in ['mp4', 'webm']:
ext = format_
elif format_ == 'rtmp':
ext = 'flv'
elif format_ == 'apple':
ext = 'mp4'
# Some streams have mp3 audio which does not play
# well with ffmpeg filter aac_adtstoasc
preference = -1
elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
continue
else: # Other formats not supported yet
continue
formats.append({
'url': video_url,
'ext': ext,
'format_id': format_id,
'quality': quality(fmt.get('quality')),
'preference': preference,
})
self._sort_formats(formats)
title = item['title']
is_live = item['type'] == 'stream'
if is_live:
title = self._live_title(title)
timestamp = None
duration = None
if not is_live:
duration = int_or_none(item.get('length'))
timestamp = item.get('published')
if timestamp:
timestamp = parse_iso8601(timestamp[:-5])
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': item.get('image'),
'duration': duration,
'timestamp': timestamp,
'is_live': is_live,
'formats': formats,
}
Loading…
Cancel
Save