youtube-dl/youtube_dl/extractor/svt.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    determine_ext,
    dict_get,
    int_or_none,
    str_or_none,
    strip_or_none,
    try_get,
)


class SVTBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['SE']

    def _extract_video(self, video_info, video_id):
        is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
        m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
        formats = []
        for vr in video_info['videoReferences']:
            player_type = vr.get('playerType') or vr.get('format')
            vurl = vr['url']
            ext = determine_ext(vurl)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    vurl, video_id,
                    ext='mp4', entry_protocol=m3u8_protocol,
                    m3u8_id=player_type, fatal=False))
            elif ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    vurl + '?hdcore=3.3.0', video_id,
                    f4m_id=player_type, fatal=False))
            elif ext == 'mpd':
                if player_type == 'dashhbbtv':
                    formats.extend(self._extract_mpd_formats(
                        vurl, video_id, mpd_id=player_type, fatal=False))
            else:
                formats.append({
                    'format_id': player_type,
                    'url': vurl,
                })
        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
            self.raise_geo_restricted(
                'This video is only available in Sweden',
                countries=self._GEO_COUNTRIES)
        self._sort_formats(formats)

        subtitles = {}
        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
        if isinstance(subtitle_references, list):
            for sr in subtitle_references:
                subtitle_url = sr.get('url')
                subtitle_lang = sr.get('language', 'sv')
                if subtitle_url:
                    if determine_ext(subtitle_url) == 'm3u8':
                        # TODO(yan12125): handle WebVTT in m3u8 manifests
                        continue

                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})

        title = video_info.get('title')

        series = video_info.get('programTitle')
        season_number = int_or_none(video_info.get('season'))
        episode = video_info.get('episodeTitle')
        episode_number = int_or_none(video_info.get('episodeNumber'))

        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
        age_limit = None
        adult = dict_get(
            video_info, ('inappropriateForChildren', 'blockedForChildren'),
            skip_false_values=False)
        if adult is not None:
            age_limit = 18 if adult else 0

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles,
            'duration': duration,
            'age_limit': age_limit,
            'series': series,
            'season_number': season_number,
            'episode': episode,
            'episode_number': episode_number,
            'is_live': is_live,
        }


class SVTIE(SVTBaseIE):
    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
        'info_dict': {
            'id': '2900353',
            'ext': 'mp4',
            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
            'duration': 27,
            'age_limit': 0,
        },
    }

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
        if mobj:
            return mobj.group('url')

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        widget_id = mobj.group('widget_id')
        article_id = mobj.group('id')

        info = self._download_json(
            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
            article_id)

        info_dict = self._extract_video(info['video'], article_id)
        info_dict['title'] = info['context']['title']
        return info_dict


class SVTPlayBaseIE(SVTBaseIE):
    _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'


class SVTPlayIE(SVTPlayBaseIE):
    IE_DESC = 'SVT Play and Öppet arkiv'
    _VALID_URL = r'''(?x)
                    (?:
                        svt:(?P<svt_id>[^/?#&]+)|
                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
                    )
                    '''
    _TESTS = [{
        'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
        'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
        'info_dict': {
            'id': '5996901',
            'ext': 'mp4',
            'title': 'Flygplan till Haile Selassie',
            'duration': 3527,
            'thumbnail': r're:^https?://.*[\.-]jpg$',
            'age_limit': 0,
            'subtitles': {
                'sv': [{
                    'ext': 'wsrt',
                }]
            },
        },
    }, {
        # geo restricted to Sweden
        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
        'only_matching': True,
    }, {
        'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
        'only_matching': True,
    }, {
        'url': 'https://www.svtplay.se/kanaler/svt1',
        'only_matching': True,
    }, {
        'url': 'svt:1376446-003A',
        'only_matching': True,
    }, {
        'url': 'svt:14278044',
        'only_matching': True,
    }]

    def _adjust_title(self, info):
        if info['is_live']:
            info['title'] = self._live_title(info['title'])

    def _extract_by_video_id(self, video_id, webpage=None):
        data = self._download_json(
            'https://api.svt.se/videoplayer-api/video/%s' % video_id,
            video_id, headers=self.geo_verification_headers())
        info_dict = self._extract_video(data, video_id)
        if not info_dict.get('title'):
            title = dict_get(info_dict, ('episode', 'series'))
            if not title and webpage:
                title = re.sub(
                    r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
            if not title:
                title = video_id
            info_dict['title'] = title
        self._adjust_title(info_dict)
        return info_dict

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id, svt_id = mobj.group('id', 'svt_id')

        if svt_id:
            return self._extract_by_video_id(svt_id)

        webpage = self._download_webpage(url, video_id)

        data = self._parse_json(
            self._search_regex(
                self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
                group='json'),
            video_id, fatal=False)

        thumbnail = self._og_search_thumbnail(webpage)

        if data:
            video_info = try_get(
                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
                dict)
            if video_info:
                info_dict = self._extract_video(video_info, video_id)
                info_dict.update({
                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
                    'thumbnail': thumbnail,
                })
                self._adjust_title(info_dict)
                return info_dict

            svt_id = try_get(
                data, lambda x: x['statistics']['dataLake']['content']['id'],
                compat_str)

        if not svt_id:
            svt_id = self._search_regex(
                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
                webpage, 'video id')

        return self._extract_by_video_id(svt_id, webpage)


class SVTSeriesIE(SVTPlayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
    _TESTS = [{
        'url': 'https://www.svtplay.se/rederiet',
        'info_dict': {
            'id': '14445680',
            'title': 'Rederiet',
            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
        },
        'playlist_mincount': 318,
    }, {
        'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
        'info_dict': {
            'id': 'season-2-14445680',
            'title': 'Rederiet - Säsong 2',
            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
        },
        'playlist_mincount': 12,
    }]

    @classmethod
    def suitable(cls, url):
        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)

    def _real_extract(self, url):
        series_slug, season_id = re.match(self._VALID_URL, url).groups()

        series = self._download_json(
            'https://api.svt.se/contento/graphql', series_slug,
            'Downloading series page', query={
                'query': '''{
  listablesBySlug(slugs: ["%s"]) {
    associatedContent(include: [productionPeriod, season]) {
      items {
        item {
          ... on Episode {
            videoSvtId
          }
        }
      }
      id
      name
    }
    id
    longDescription
    name
    shortDescription
  }
}''' % series_slug,
            })['data']['listablesBySlug'][0]

        season_name = None

        entries = []
        for season in series['associatedContent']:
            if not isinstance(season, dict):
                continue
            if season_id:
                if season.get('id') != season_id:
                    continue
                season_name = season.get('name')
            items = season.get('items')
            if not isinstance(items, list):
                continue
            for item in items:
                video = item.get('item') or {}
                content_id = video.get('videoSvtId')
                if not content_id or not isinstance(content_id, compat_str):
                    continue
                entries.append(self.url_result(
                    'svt:' + content_id, SVTPlayIE.ie_key(), content_id))

        title = series.get('name')
        season_name = season_name or season_id

        if title and season_name:
            title = '%s - %s' % (title, season_name)
        elif season_id:
            title = season_id

        return self.playlist_result(
            entries, season_id or series.get('id'), title,
            dict_get(series, ('longDescription', 'shortDescription')))


class SVTPageIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
    _TESTS = [{
        'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
        'info_dict': {
            'id': '25298267',
            'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
        },
        'playlist_count': 4,
    }, {
        'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
        'info_dict': {
            'id': '24243746',
            'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
        },
        'playlist_count': 2,
    }, {
        # only programTitle
        'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
        'info_dict': {
            'id': '8439V2K',
            'ext': 'mp4',
            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
            'duration': 27,
            'age_limit': 0,
        },
    }, {
        'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
        'only_matching': True,
    }, {
        'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)

    def _real_extract(self, url):
        path, display_id = re.match(self._VALID_URL, url).groups()

        article = self._download_json(
            'https://api.svt.se/nss-api/page/' + path, display_id,
            query={'q': 'articles'})['articles']['content'][0]

        entries = []

        def _process_content(content):
            if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
                video_id = compat_str(content['image']['svtId'])
                entries.append(self.url_result(
                    'svt:' + video_id, SVTPlayIE.ie_key(), video_id))

        for media in article.get('media', []):
            _process_content(media)

        for obj in article.get('structuredBody', []):
            _process_content(obj.get('content') or {})

        return self.playlist_result(
            entries, str_or_none(article.get('id')),
            strip_or_none(article.get('title')))
-												[svtplay] Correct test case

											
										
										
											9 years ago
+								# coding: utf-8
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								from __future__ import unicode_literals
-												[oppetarkiv] Merge with svtplay

											
										
										
											9 years ago
+								import re
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								from .common import InfoExtractor
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								from ..compat import compat_str
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								from ..utils import (
 								    determine_ext,
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								    dict_get,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    int_or_none,
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								    str_or_none,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    strip_or_none,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    try_get,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								)
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								class SVTBaseIE(InfoExtractor):
-												Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

											
										
										
											7 years ago
+								    _GEO_COUNTRIES = ['SE']
-												[svt] PEP 8

											
										
										
											7 years ago
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    def _extract_video(self, video_info, video_id):
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											6 years ago
+								        is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
 								        m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								        formats = []
 								        for vr in video_info['videoReferences']:
-												[svt] Fix DASH formats extraction

											
										
										
											8 years ago
+								            player_type = vr.get('playerType') or vr.get('format')
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								            vurl = vr['url']
-												[oppetarkiv] Merge with svtplay

											
										
										
											9 years ago
+								            ext = determine_ext(vurl)
 								            if ext == 'm3u8':
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								                formats.extend(self._extract_m3u8_formats(
 								                    vurl, video_id,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											6 years ago
+								                    ext='mp4', entry_protocol=m3u8_protocol,
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											8 years ago
+								                    m3u8_id=player_type, fatal=False))
-												[oppetarkiv] Merge with svtplay

											
										
										
											9 years ago
+								            elif ext == 'f4m':
 								                formats.extend(self._extract_f4m_formats(
 								                    vurl + '?hdcore=3.3.0', video_id,
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											8 years ago
+								                    f4m_id=player_type, fatal=False))
 								            elif ext == 'mpd':
 								                if player_type == 'dashhbbtv':
 								                    formats.extend(self._extract_mpd_formats(
 								                        vurl, video_id, mpd_id=player_type, fatal=False))
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								            else:
 								                formats.append({
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											8 years ago
+								                    'format_id': player_type,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								                    'url': vurl,
 								                })
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
-												[svt] Improve geo restriction detection and use geo bypass mechanism

											
										
										
											7 years ago
+								            self.raise_geo_restricted(
-												Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

											
										
										
											7 years ago
+								                'This video is only available in Sweden',
 								                countries=self._GEO_COUNTRIES)
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								        self._sort_formats(formats)
-												[SVTPlay] Add subtitle support

											
										
										
											8 years ago
+								        subtitles = {}
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											8 years ago
+								        if isinstance(subtitle_references, list):
 								            for sr in subtitle_references:
 								                subtitle_url = sr.get('url')
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								                subtitle_lang = sr.get('language', 'sv')
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											8 years ago
+								                if subtitle_url:
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								                    if determine_ext(subtitle_url) == 'm3u8':
 								                        # TODO(yan12125): handle WebVTT in m3u8 manifests
 								                        continue
 								                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
-												[SVTPlay] Add subtitle support

											
										
										
											8 years ago
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								        title = video_info.get('title')
 								        series = video_info.get('programTitle')
 								        season_number = int_or_none(video_info.get('season'))
 								        episode = video_info.get('episodeTitle')
 								        episode_number = int_or_none(video_info.get('episodeNumber'))
 								        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
 								        age_limit = None
 								        adult = dict_get(
 								            video_info, ('inappropriateForChildren', 'blockedForChildren'),
 								            skip_false_values=False)
 								        if adult is not None:
 								            age_limit = 18 if adult else 0
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
 								        return {
 								            'id': video_id,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								            'title': title,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								            'formats': formats,
-												[SVTPlay] Add subtitle support

											
										
										
											8 years ago
+								            'subtitles': subtitles,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								            'duration': duration,
-												[oppetarkiv] Merge with svtplay

											
										
										
											9 years ago
+								            'age_limit': age_limit,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								            'series': series,
 								            'season_number': season_number,
 								            'episode': episode,
 								            'episode_number': episode_number,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											6 years ago
+								            'is_live': is_live,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											9 years ago
+								        }
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
 								class SVTIE(SVTBaseIE):
 								    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
 								    _TEST = {
 								        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								        'info_dict': {
 								            'id': '2900353',
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								            'ext': 'mp4',
 								            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								            'duration': 27,
 								            'age_limit': 0,
 								        },
 								    }
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											9 years ago
+								    @staticmethod
 								    def _extract_url(webpage):
 								        mobj = re.search(
 								            r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
 								        if mobj:
 								            return mobj.group('url')
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        widget_id = mobj.group('widget_id')
 								        article_id = mobj.group('id')
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
 								        info = self._download_json(
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
 								            article_id)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								        info_dict = self._extract_video(info['video'], article_id)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
+								        info_dict['title'] = info['context']['title']
 								        return info_dict
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
-												[svtplay] Share svtplay regex

											
										
										
											6 years ago
+								class SVTPlayBaseIE(SVTBaseIE):
 								    _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
 								class SVTPlayIE(SVTPlayBaseIE):
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								    IE_DESC = 'SVT Play and Öppet arkiv'
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    _VALID_URL = r'''(?x)
 								                    (?:
 								                        svt:(?P<svt_id>[^/?#&]+)|
 								                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
 								                    )
 								                    '''
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    _TESTS = [{
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											8 years ago
+								        'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
 								        'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								        'info_dict': {
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											8 years ago
+								            'id': '5996901',
 								            'ext': 'mp4',
 								            'title': 'Flygplan till Haile Selassie',
 								            'duration': 3527,
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											7 years ago
+								            'thumbnail': r're:^https?://.*[\.-]jpg$',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								            'age_limit': 0,
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											8 years ago
+								            'subtitles': {
 								                'sv': [{
 								                    'ext': 'wsrt',
 								                }]
 								            },
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								        },
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    }, {
 								        # geo restricted to Sweden
 								        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
 								        'only_matching': True,
-												[svtplay] Extend _VALID_URL (#9900)

											
										
										
											8 years ago
+								    }, {
 								        'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
 								        'only_matching': True,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											6 years ago
+								    }, {
 								        'url': 'https://www.svtplay.se/kanaler/svt1',
 								        'only_matching': True,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    }, {
 								        'url': 'svt:1376446-003A',
 								        'only_matching': True,
 								    }, {
 								        'url': 'svt:14278044',
 								        'only_matching': True,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								    }]
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    def _adjust_title(self, info):
 								        if info['is_live']:
 								            info['title'] = self._live_title(info['title'])
 								    def _extract_by_video_id(self, video_id, webpage=None):
 								        data = self._download_json(
-												[svtplay] Update API URL (closes #21075)

											
										
										
											5 years ago
+								            'https://api.svt.se/videoplayer-api/video/%s' % video_id,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								            video_id, headers=self.geo_verification_headers())
 								        info_dict = self._extract_video(data, video_id)
 								        if not info_dict.get('title'):
 								            title = dict_get(info_dict, ('episode', 'series'))
 								            if not title and webpage:
 								                title = re.sub(
 								                    r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
 								            if not title:
 								                title = video_id
 								            info_dict['title'] = title
 								        self._adjust_title(info_dict)
 								        return info_dict
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											9 years ago
+								    def _real_extract(self, url):
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        mobj = re.match(self._VALID_URL, url)
 								        video_id, svt_id = mobj.group('id', 'svt_id')
 								        if svt_id:
 								            return self._extract_by_video_id(svt_id)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
 								        webpage = self._download_webpage(url, video_id)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								        data = self._parse_json(
 								            self._search_regex(
-												[svtplay] Share svtplay regex

											
										
										
											6 years ago
+								                self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
 								                group='json'),
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								            video_id, fatal=False)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											8 years ago
 								        thumbnail = self._og_search_thumbnail(webpage)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								        if data:
 								            video_info = try_get(
 								                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
 								                dict)
 								            if video_info:
 								                info_dict = self._extract_video(video_info, video_id)
 								                info_dict.update({
 								                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
 								                    'thumbnail': thumbnail,
 								                })
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								                self._adjust_title(info_dict)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
+								                return info_dict
-												[svtplay] Fix svt id extraction (closes #26425, closes #26428, closes #26438)

											
										
										
											4 years ago
+								            svt_id = try_get(
 								                data, lambda x: x['statistics']['dataLake']['content']['id'],
 								                compat_str)
 								        if not svt_id:
 								            svt_id = self._search_regex(
 								                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-												[svtplay] Fix id extraction (closes #26576)

											
										
										
											4 years ago
+								                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
 								                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
 								                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
-												[svtplay] Fix svt id extraction (closes #26425, closes #26428, closes #26438)

											
										
										
											4 years ago
+								                webpage, 'video id')
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											8 years ago
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        return self._extract_by_video_id(svt_id, webpage)
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
-												[svtplay] Share svtplay regex

											
										
										
											6 years ago
+								class SVTSeriesIE(SVTPlayBaseIE):
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								    _TESTS = [{
 								        'url': 'https://www.svtplay.se/rederiet',
 								        'info_dict': {
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            'id': '14445680',
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								            'title': 'Rederiet',
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								        },
 								        'playlist_mincount': 318,
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								    }, {
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								        'info_dict': {
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            'id': 'season-2-14445680',
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								            'title': 'Rederiet - Säsong 2',
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								        },
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        'playlist_mincount': 12,
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								    }]
 								    @classmethod
 								    def suitable(cls, url):
-												[svtplay:series] Improve extraction (closes #16059)

											
										
										
											6 years ago
+								        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
 								    def _real_extract(self, url):
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        series_slug, season_id = re.match(self._VALID_URL, url).groups()
 								        series = self._download_json(
 								            'https://api.svt.se/contento/graphql', series_slug,
 								            'Downloading series page', query={
 								                'query': '''{
 								  listablesBySlug(slugs: ["%s"]) {
 								    associatedContent(include: [productionPeriod, season]) {
 								      items {
 								        item {
 								          ... on Episode {
 								            videoSvtId
 								          }
 								        }
 								      }
 								      id
 								      name
 								    }
 								    id
 								    longDescription
 								    name
 								    shortDescription
 								  }
 								}''' % series_slug,
 								            })['data']['listablesBySlug'][0]
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
 								        season_name = None
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
 								        entries = []
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        for season in series['associatedContent']:
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								            if not isinstance(season, dict):
 								                continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            if season_id:
 								                if season.get('id') != season_id:
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
+								                    continue
 								                season_name = season.get('name')
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            items = season.get('items')
 								            if not isinstance(items, list):
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								                continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            for item in items:
 								                video = item.get('item') or {}
 								                content_id = video.get('videoSvtId')
 								                if not content_id or not isinstance(content_id, compat_str):
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								                    continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								                entries.append(self.url_result(
 								                    'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
-												[svtplay:series] Improve extraction (closes #16059)

											
										
										
											6 years ago
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        title = series.get('name')
 								        season_name = season_name or season_id
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
 								        if title and season_name:
 								            title = '%s - %s' % (title, season_name)
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								        elif season_id:
 								            title = season_id
-												[svtplay:series] Add support for season URLs

											
										
										
											6 years ago
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											6 years ago
+								        return self.playlist_result(
-												[svt] fix series extraction(closes #22297)

											
										
										
											4 years ago
+								            entries, season_id or series.get('id'), title,
 								            dict_get(series, ('longDescription', 'shortDescription')))
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
 								class SVTPageIE(InfoExtractor):
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    _TESTS = [{
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								            'id': '25298267',
 								            'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        },
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        'playlist_count': 4,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    }, {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								            'id': '24243746',
 								            'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								        },
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        'playlist_count': 2,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								    }, {
 								        # only programTitle
 								        'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								            'id': '8439V2K',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
+								            'ext': 'mp4',
 								            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
 								            'duration': 27,
 								            'age_limit': 0,
 								        },
 								    }, {
 								        'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
 								        'only_matching': True,
 								    }]
 								    @classmethod
 								    def suitable(cls, url):
 								        return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
 								    def _real_extract(self, url):
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        path, display_id = re.match(self._VALID_URL, url).groups()
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        article = self._download_json(
 								            'https://api.svt.se/nss-api/page/' + path, display_id,
 								            query={'q': 'articles'})['articles']['content'][0]
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        entries = []
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        def _process_content(content):
 								            if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
 								                video_id = compat_str(content['image']['svtId'])
 								                entries.append(self.url_result(
 								                    'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											6 years ago
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											4 years ago
+								        for media in article.get('media', []):
 								            _process_content(media)
 								        for obj in article.get('structuredBody', []):
 								            _process_content(obj.get('content') or {})
 								        return self.playlist_result(
 								            entries, str_or_none(article.get('id')),
 								            strip_or_none(article.get('title')))