@ -7,17 +7,19 @@ from ..utils import (
int_or_none ,
url_or_none ,
)
import json
class YandexVideoIE ( InfoExtractor ) :
_VALID_URL = r ''' (?x)
https ? : / /
( ? :
yandex \. ru ( ? : / portal / ( ? : video | efir ) ) ? / ? \? . * ? stream_id = |
yandex \. ru ( ? : / portal / ( ? : video | efir ) | / efir | / video ) ? / ? \? . * ? ( stream_id = | stream_channel = ) |
frontend \. vh \. yandex \. ru / player /
)
( ? P < id > [ \d a- f ] + )
( ? P < id > [ \d \w ] + )
'''
_TESTS = [ {
' url ' : ' https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d ' ,
' md5 ' : ' 33955d7ae052f15853dc41f35f17581c ' ,
@ -52,17 +54,47 @@ class YandexVideoIE(InfoExtractor):
# DASH with DRM
' url ' : ' https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8 ' ,
' only_matching ' : True ,
} , {
# Efir for program
' url ' : ' https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo ' ,
' only_matching ' : True ,
} , {
# efir for channel
' url ' : ' https://yandex.ru/efir?stream_channel=1335 ' ,
' only_matching ' : True ,
} ]
def split_params ( self , txt : str ) - > [ ] :
braket = 0
line = ' '
items = [ ]
for c in txt :
if c == ' { ' :
braket + = 1
elif c == ' } ' :
braket - = 1
if c == ' , ' and braket == 0 :
items . append ( line )
line = ' '
else :
line + = c
if line != ' ' :
items . append ( line )
return items
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
content = self . _download_json (
' https://frontend.vh.yandex.ru/v22/player/ %s .json ' % video_id ,
video_id , query = {
' stream_options ' : ' hires ' ,
' disable_trackings ' : 1 ,
} ) [ ' content ' ]
html = self . _download_webpage ( url , video_id )
# search call of Yandex stream player
func_params = self . _search_regex ( r ' \ s*Ya \ .streamModules \ .player \ .play \ ( \ s*([ \ w \ W \\ R \ " ]*) \ , \ s*true \ ); \ s* ' ,
html , ' Yandex player ' , flags = re . DOTALL | re . MULTILINE )
params = self . split_params ( func_params )
json_text = params [ 1 ]
content = json . loads ( json_text )
content = content . get ( ' program ' )
content_url = url_or_none ( content . get ( ' content_url ' ) ) or url_or_none (
content [ ' streams ' ] [ 0 ] [ ' url ' ] )