[rudo] remove extractor(closes #18430)(closes #18474)

Covered by generic extractor
5 years ago · 0441d6266c
parent 82f68e4a01
commit 0441d6266c
3 changed files with 12 additions and 61 deletions
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@ -6,7 +6,6 @@ from ..utils import (
    ExtractorError,
    remove_end,
 )
 from .rudo import RudoIE
 class BioBioChileTVIE(InfoExtractor):
@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
    }, {
        'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
        'info_dict': {
-            'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
+            'id': 'b4xd0LK3SK',
            'ext': 'mp4',
-            'uploader': '(none)',
+            # TODO: fix url_transparent information overriding
-            'upload_date': '20160708',
+            # 'uploader': 'Juan Pablo Echenique',
-            'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
+            'title': 'Comentario Oscar Cáceres',
        },
        'params': {
            # empty m3u8 manifest
            'skip_download': True,
        },
    }, {
        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
-        rudo_url = RudoIE._extract_url(webpage)
+        rudo_url = self._search_regex(
            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
            webpage, 'embed URL', None, group='url')
        if not rudo_url:
            raise ExtractorError('No videos found')
@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
-            r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+            r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
            webpage, 'uploader', fatal=False)
        return {
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -967,7 +967,6 @@ from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
 from .rudo import RudoIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@ -1,53 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
    get_element_by_class,
    unified_strdate,
 )
 class RudoIE(InfoExtractor):
    _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
    _TEST = {
        'url': 'http://rudo.video/vod/oTzw0MGnyG',
        'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
        'info_dict': {
            'id': 'oTzw0MGnyG',
            'ext': 'mp4',
            'title': 'Comentario Tomás Mosciatti',
            'upload_date': '20160617',
        },
    }
    @classmethod
    def _extract_url(cls, webpage):
        mobj = re.search(
            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
            webpage)
        if mobj:
            return mobj.group('url')
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
        jwplayer_data = self._parse_json(self._search_regex(
            r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
            transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
        info_dict = self._parse_jwplayer_data(
            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
        info_dict.update({
            'title': self._og_search_title(webpage),
            'upload_date': unified_strdate(get_element_by_class('date', webpage)),
        })
        return info_dict