You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
youtube-dl/youtube_dl/extractor/videomore.py

308 lines
10 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
orderedSet,
parse_duration,
str_or_none,
unified_strdate,
url_or_none,
xpath_element,
xpath_text,
)
class VideomoreIE(InfoExtractor):
IE_NAME = 'videomore'
_VALID_URL = r'''(?x)
videomore:(?P<sid>\d+)$|
https?://(?:player\.)?videomore\.ru/
(?:
(?:
embed|
[^/]+/[^/]+
)/|
[^/]*\?.*?\btrack_id=
)
(?P<id>\d+)
(?:[/?#&]|\.(?:xml|json)|$)
'''
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
'info_dict': {
'id': '367617',
'ext': 'flv',
'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
'series': 'Кино в деталях',
'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2910,
'view_count': int,
'comment_count': int,
'age_limit': 16,
},
}, {
'url': 'http://videomore.ru/embed/259974',
'info_dict': {
'id': '259974',
'ext': 'flv',
'title': 'Молодежка 2 сезон 40 серия',
'series': 'Молодежка',
'episode': '40 серия',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2809,
'view_count': int,
'comment_count': int,
'age_limit': 16,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
'info_dict': {
'id': '341073',
'ext': 'flv',
'title': 'Промо Команда проиграла из-за Бакина?',
'episode': 'Команда проиграла из-за Бакина?',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 29,
'age_limit': 16,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://videomore.ru/elki_3?track_id=364623',
'only_matching': True,
}, {
'url': 'http://videomore.ru/embed/364623',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/364623.xml',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/364623.json',
'only_matching': True,
}, {
'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
'only_matching': True,
}, {
'url': 'videomore:367617',
'only_matching': True,
}, {
'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
webpage)
if not mobj:
mobj = re.search(
r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('sid') or mobj.group('id')
video = self._download_xml(
'http://videomore.ru/video/tracks/%s.xml' % video_id,
video_id, 'Downloading video XML')
item = xpath_element(video, './/playlist/item', fatal=True)
title = xpath_text(
item, ('./title', './episode_name'), 'title', fatal=True)
video_url = xpath_text(item, './video_url', 'video url', fatal=True)
formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
self._sort_formats(formats)
thumbnail = xpath_text(item, './thumbnail_url')
duration = int_or_none(xpath_text(item, './duration'))
view_count = int_or_none(xpath_text(item, './views'))
comment_count = int_or_none(xpath_text(item, './count_comments'))
age_limit = int_or_none(xpath_text(item, './min_age'))
series = xpath_text(item, './project_name')
episode = xpath_text(item, './episode_name')
return {
'id': video_id,
'title': title,
'series': series,
'episode': episode,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'age_limit': age_limit,
'formats': formats,
}
class VideomoreVideoIE(InfoExtractor):
IE_NAME = 'videomore:video'
_VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
# single video with og:video:iframe
'url': 'http://videomore.ru/elki_3',
'info_dict': {
'id': '364623',
'ext': 'flv',
'title': 'Ёлки 3',
'description': '',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 5579,
'age_limit': 6,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
# season single series with og:video:iframe
'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
'only_matching': True,
}, {
'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
'only_matching': True,
}, {
# single video without og:video:iframe
'url': 'http://videomore.ru/marin_i_ego_druzya',
'info_dict': {
'id': '359073',
'ext': 'flv',
'title': '1 серия. Здравствуй, Аквавилль!',
'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 754,
'age_limit': 6,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_url = self._og_search_property(
'video:iframe', webpage, 'video url', default=None)
if not video_url:
video_id = self._search_regex(
(r'config\s*:\s*["\']https?://videomore\.ru/video/tracks/(\d+)\.xml',
r'track-id=["\'](\d+)',
r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
video_url = 'videomore:%s' % video_id
else:
video_id = None
return self.url_result(
video_url, ie=VideomoreIE.ie_key(), video_id=video_id)
class VideomoreSeasonIE(InfoExtractor):
IE_NAME = 'videomore:season'
_VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
'url': 'http://videomore.ru/molodezhka/sezon_promo',
'info_dict': {
'id': 'molodezhka/sezon_promo',
'title': 'Молодежка Промо',
},
'playlist_mincount': 12,
}, {
'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
else super(VideomoreSeasonIE, cls).suitable(url))
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
data = self._parse_json(
self._html_search_regex(
r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1',
webpage, 'data', default='{}', group='value'),
display_id, fatal=False)
entries = []
if data:
episodes = data.get('episodes')
if isinstance(episodes, list):
for ep in episodes:
if not isinstance(ep, dict):
continue
ep_id = int_or_none(ep.get('id'))
ep_url = url_or_none(ep.get('url'))
if ep_id:
e = {
'url': 'videomore:%s' % ep_id,
'id': compat_str(ep_id),
}
elif ep_url:
e = {'url': ep_url}
else:
continue
e.update({
'_type': 'url',
'ie_key': VideomoreIE.ie_key(),
'title': str_or_none(ep.get('title')),
'thumbnail': url_or_none(ep.get('image')),
'duration': parse_duration(ep.get('duration')),
'episode_number': int_or_none(ep.get('number')),
'upload_date': unified_strdate(ep.get('date')),
})
entries.append(e)
if not entries:
entries = [
self.url_result(
'videomore:%s' % video_id, ie=VideomoreIE.ie_key(),
video_id=video_id)
for video_id in orderedSet(re.findall(
r':(?:id|key)=["\'](\d+)["\']', webpage))]
if not entries:
entries = [
self.url_result(item) for item in re.findall(
r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
% display_id, webpage)]
return self.playlist_result(entries, display_id, title)