From 2f2665ff4b057cc0004cb4f5ba2a4b2486f48bb8 Mon Sep 17 00:00:00 2001 From: devbreath Date: Wed, 7 Oct 2020 17:21:45 +0300 Subject: [PATCH] fixed extract for Yandex Efir fixed extract for Yandex Efir(single program and program on channel). Yandex Efir not support request for json. Now json data used as a parameter in Yandex stream player. Current extractor find call of Yandex stream player, parse parameters and find information about program. --- youtube_dl/extractor/yandexvideo.py | 48 ++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py index 46529be05..933079307 100644 --- a/youtube_dl/extractor/yandexvideo.py +++ b/youtube_dl/extractor/yandexvideo.py @@ -7,17 +7,19 @@ from ..utils import ( int_or_none, url_or_none, ) +import json class YandexVideoIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=| + yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)| frontend\.vh\.yandex\.ru/player/ ) - (?P[\da-f]+) + (?P[\d\w]+) ''' + _TESTS = [{ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', 'md5': '33955d7ae052f15853dc41f35f17581c', @@ -52,17 +54,47 @@ class YandexVideoIE(InfoExtractor): # DASH with DRM 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', 'only_matching': True, + }, { + # Efir for program + 'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo', + 'only_matching': True, + }, { + # efir for channel + 'url': 'https://yandex.ru/efir?stream_channel=1335', + 'only_matching': True, }] + def split_params(self, txt: str) -> []: + braket = 0 + line = '' + items = [] + for c in txt: + if c == '{': + braket += 1 + elif c == '}': + braket -= 1 + if c == ',' and braket == 0: + items.append(line) + line = '' + else: + line += c + if line != '': + items.append(line) + return items + + def _real_extract(self, url): video_id = self._match_id(url) - content = self._download_json( - 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id, - video_id, query={ - 'stream_options': 'hires', - 'disable_trackings': 1, - })['content'] + html = self._download_webpage(url, video_id) + + # search call of Yandex stream player + func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*', + html, 'Yandex player', flags=re.DOTALL | re.MULTILINE) + params = self.split_params(func_params) + json_text = params[1] + content = json.loads(json_text) + content = content.get('program') content_url = url_or_none(content.get('content_url')) or url_or_none( content['streams'][0]['url'])