[extractor/generic] Add support for mediaset embeds

master
Sergey M․ 7 years ago
parent ca04de463d
commit 5d29af3d15
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -89,6 +89,7 @@ from .limelight import LimelightBaseIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE from .wistia import WistiaIE
from .mediaset import MediasetIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2648,6 +2649,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
mediaset_urls = MediasetIE._extract_urls(webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

@ -59,6 +59,14 @@ class MediasetIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

Loading…
Cancel
Save