# coding: utf-8
from __future__ import unicode_literals
import re
import hashlib
from . common import InfoExtractor
from . . compat import compat_str
from . . utils import (
ExtractorError ,
int_or_none ,
float_or_none ,
try_get ,
)
class YandexMusicBaseIE ( InfoExtractor ) :
@staticmethod
def _handle_error ( response ) :
if isinstance ( response , dict ) :
error = response . get ( ' error ' )
if error :
raise ExtractorError ( error , expected = True )
if response . get ( ' type ' ) == ' captcha ' or ' captcha ' in response :
YandexMusicBaseIE . _raise_captcha ( )
@staticmethod
def _raise_captcha ( ) :
raise ExtractorError (
' YandexMusic has considered youtube-dl requests automated and '
' asks you to solve a CAPTCHA. You can either wait for some '
' time until unblocked and optionally use --sleep-interval '
' in future or alternatively you can go to https://music.yandex.ru/ '
' solve CAPTCHA, then export cookies and pass cookie file to '
' youtube-dl with --cookies ' ,
expected = True )
def _download_webpage_handle ( self , * args , * * kwargs ) :
webpage = super ( YandexMusicBaseIE , self ) . _download_webpage_handle ( * args , * * kwargs )
if ' Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические. ' in webpage :
self . _raise_captcha ( )
return webpage
def _download_json ( self , * args , * * kwargs ) :
response = super ( YandexMusicBaseIE , self ) . _download_json ( * args , * * kwargs )
self . _handle_error ( response )
return response
class YandexMusicTrackIE ( YandexMusicBaseIE ) :
IE_NAME = ' yandexmusic:track '
IE_DESC = ' Яндекс.Музыка - Трек '
_VALID_URL = r ' https?://music \ .yandex \ .(?:ru|kz|ua|by)/album/(?P<album_id> \ d+)/track/(?P<id> \ d+) '
_TESTS = [ {
' url ' : ' http://music.yandex.ru/album/540508/track/4878838 ' ,
' md5 ' : ' dec8b661f12027ceaba33318787fff76 ' ,
' info_dict ' : {
' id ' : ' 4878838 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1 ' ,
' duration ' : 193.04 ,
' track ' : ' Gypsy Eyes 1 ' ,
' album ' : ' Gypsy Soul ' ,
' album_artist ' : ' Carlo Ambrosio ' ,
' artist ' : ' Carlo Ambrosio & Fabio Di Bari ' ,
' release_year ' : 2009 ,
} ,
} , {
# multiple disks
' url ' : ' http://music.yandex.ru/album/3840501/track/705105 ' ,
' md5 ' : ' 82a54e9e787301dd45aba093cf6e58c0 ' ,
' info_dict ' : {
' id ' : ' 705105 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Hooverphonic - Sometimes ' ,
' duration ' : 239.27 ,
' track ' : ' Sometimes ' ,
' album ' : ' The Best of Hooverphonic ' ,
' album_artist ' : ' Hooverphonic ' ,
' artist ' : ' Hooverphonic ' ,
' release_year ' : 2016 ,
' genre ' : ' pop ' ,
' disc_number ' : 2 ,
' track_number ' : 9 ,
} ,
} ]
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
album_id , track_id = mobj . group ( ' album_id ' ) , mobj . group ( ' id ' )
track = self . _download_json (
' http://music.yandex.ru/handlers/track.jsx?track= %s : %s ' % ( track_id , album_id ) ,
track_id , ' Downloading track JSON ' ) [ ' track ' ]
track_title = track [ ' title ' ]
download_data = self . _download_json (
' https://music.yandex.ru/api/v2.1/handlers/track/ %s : %s /web-album_track-track-track-main/download/m ' % ( track_id , album_id ) ,
track_id , ' Downloading track location url JSON ' ,
headers = { ' X-Retpath-Y ' : url } )
fd_data = self . _download_json (
download_data [ ' src ' ] , track_id ,
' Downloading track location JSON ' ,
query = { ' format ' : ' json ' } )
key = hashlib . md5 ( ( ' XGRlBW9FXlekgbPrRHuSiA ' + fd_data [ ' path ' ] [ 1 : ] + fd_data [ ' s ' ] ) . encode ( ' utf-8 ' ) ) . hexdigest ( )
f_url = ' http:// %s /get-mp3/ %s / %s ?track-id= %s ' % ( fd_data [ ' host ' ] , key , fd_data [ ' ts ' ] + fd_data [ ' path ' ] , track [ ' realId ' ] )
thumbnail = None
cover_uri = track . get ( ' albums ' , [ { } ] ) [ 0 ] . get ( ' coverUri ' )
if cover_uri :
thumbnail = cover_uri . replace ( ' %% ' , ' orig ' )
if not thumbnail . startswith ( ' http ' ) :
thumbnail = ' http:// ' + thumbnail
track_info = {
' id ' : track_id ,
' ext ' : ' mp3 ' ,
' url ' : f_url ,
' duration ' : float_or_none ( track . get ( ' durationMs ' ) , 1000 ) ,
' thumbnail ' : thumbnail ,
' track ' : track_title ,
' acodec ' : download_data . get ( ' codec ' ) ,
' abr ' : int_or_none ( download_data . get ( ' bitrate ' ) ) ,
}
def extract_artist_name ( artist ) :
decomposed = artist . get ( ' decomposed ' )
if not isinstance ( decomposed , list ) :
return artist [ ' name ' ]
parts = [ artist [ ' name ' ] ]
for element in decomposed :
if isinstance ( element , dict ) and element . get ( ' name ' ) :
parts . append ( element [ ' name ' ] )
elif isinstance ( element , compat_str ) :
parts . append ( element )
return ' ' . join ( parts )
def extract_artist ( artist_list ) :
if artist_list and isinstance ( artist_list , list ) :
artists_names = [ extract_artist_name ( a ) for a in artist_list if a . get ( ' name ' ) ]
if artists_names :
return ' , ' . join ( artists_names )
albums = track . get ( ' albums ' )
if albums and isinstance ( albums , list ) :
album = albums [ 0 ]
if isinstance ( album , dict ) :
year = album . get ( ' year ' )
disc_number = int_or_none ( try_get (
album , lambda x : x [ ' trackPosition ' ] [ ' volume ' ] ) )
track_number = int_or_none ( try_get (
album , lambda x : x [ ' trackPosition ' ] [ ' index ' ] ) )
track_info . update ( {
' album ' : album . get ( ' title ' ) ,
' album_artist ' : extract_artist ( album . get ( ' artists ' ) ) ,
' release_year ' : int_or_none ( year ) ,
' genre ' : album . get ( ' genre ' ) ,
' disc_number ' : disc_number ,
' track_number ' : track_number ,
} )
track_artist = extract_artist ( track . get ( ' artists ' ) )
if track_artist :
track_info . update ( {
' artist ' : track_artist ,
' title ' : ' %s - %s ' % ( track_artist , track_title ) ,
} )
else :
track_info [ ' title ' ] = track_title
return track_info
class YandexMusicPlaylistBaseIE ( YandexMusicBaseIE ) :
def _build_playlist ( self , tracks ) :
return [
self . url_result (
' http://music.yandex.ru/album/ %s /track/ %s ' % ( track [ ' albums ' ] [ 0 ] [ ' id ' ] , track [ ' id ' ] ) )
for track in tracks if track . get ( ' albums ' ) and isinstance ( track . get ( ' albums ' ) , list ) ]
class YandexMusicAlbumIE ( YandexMusicPlaylistBaseIE ) :
IE_NAME = ' yandexmusic:album '
IE_DESC = ' Яндекс.Музыка - Альбом '
_VALID_URL = r ' https?://music \ .yandex \ .(?:ru|kz|ua|by)/album/(?P<id> \ d+)/?( \ ?|$) '
_TESTS = [ {
' url ' : ' http://music.yandex.ru/album/540508 ' ,
' info_dict ' : {
' id ' : ' 540508 ' ,
' title ' : ' Carlo Ambrosio - Gypsy Soul (2009) ' ,
} ,
' playlist_count ' : 50 ,
} , {
' url ' : ' https://music.yandex.ru/album/3840501 ' ,
' info_dict ' : {
' id ' : ' 3840501 ' ,
' title ' : ' Hooverphonic - The Best of Hooverphonic (2016) ' ,
} ,
' playlist_count ' : 33 ,
} ]
def _real_extract ( self , url ) :
album_id = self . _match_id ( url )
album = self . _download_json (
' http://music.yandex.ru/handlers/album.jsx?album= %s ' % album_id ,
album_id , ' Downloading album JSON ' )
entries = self . _build_playlist ( [ track for volume in album [ ' volumes ' ] for track in volume ] )
title = ' %s - %s ' % ( album [ ' artists ' ] [ 0 ] [ ' name ' ] , album [ ' title ' ] )
year = album . get ( ' year ' )
if year :
title + = ' ( %s ) ' % year
return self . playlist_result ( entries , compat_str ( album [ ' id ' ] ) , title )
class YandexMusicPlaylistIE ( YandexMusicPlaylistBaseIE ) :
IE_NAME = ' yandexmusic:playlist '
IE_DESC = ' Яндекс.Музыка - Плейлист '
_VALID_URL = r ' https?://music \ .yandex \ .(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id> \ d+) '
_TESTS = [ {
' url ' : ' http://music.yandex.ru/users/music.partners/playlists/1245 ' ,
' info_dict ' : {
' id ' : ' 1245 ' ,
' title ' : ' Что слушают Enter Shikari ' ,
' description ' : ' md5:3b9f27b0efbe53f2ee1e844d07155cc9 ' ,
} ,
' playlist_count ' : 6 ,
} , {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/ytdl-org/youtube-dl/issues/6666)
' url ' : ' https://music.yandex.ru/users/music-blog/playlists/1190 ' ,
' info_dict ' : {
' id ' : ' 1190 ' ,
' title ' : ' Музыка 1990-х ' ,
' description ' : ' От «[Smells Like Teen Spirit](/album/5576245/track/53404)» до «[Freestyler](/album/2395430/track/4712292)». Самые популярные песни из 1990-х в одном плейлисте. '
} ,
' playlist_mincount ' : 124 ,
} ]
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
tld = mobj . group ( ' tld ' )
user = mobj . group ( ' user ' )
playlist_id = mobj . group ( ' id ' )
playlist = self . _download_json (
' https://music.yandex. %s /handlers/playlist.jsx ' % tld ,
playlist_id , ' Downloading missing tracks JSON ' ,
fatal = False ,
headers = {
' Referer ' : url ,
' X-Requested-With ' : ' XMLHttpRequest ' ,
' X-Retpath-Y ' : url ,
} ,
query = {
' owner ' : user ,
' kinds ' : playlist_id ,
' light ' : ' true ' ,
' lang ' : tld ,
' external-domain ' : ' music.yandex. %s ' % tld ,
' overembed ' : ' false ' ,
} ) [ ' playlist ' ]
tracks = playlist [ ' tracks ' ]
track_ids = [ compat_str ( track_id ) for track_id in playlist [ ' trackIds ' ] ]
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len ( tracks ) < len ( track_ids ) :
present_track_ids = set ( [
compat_str ( track [ ' id ' ] )
for track in tracks if track . get ( ' id ' ) ] )
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids ]
missing_tracks = self . _download_json (
' https://music.yandex. %s /handlers/track-entries.jsx ' % tld ,
playlist_id , ' Downloading missing tracks JSON ' ,
fatal = False ,
headers = {
' Referer ' : url ,
' X-Requested-With ' : ' XMLHttpRequest ' ,
} ,
query = {
' entries ' : ' , ' . join ( missing_track_ids ) ,
' lang ' : tld ,
' external-domain ' : ' music.yandex. %s ' % tld ,
' overembed ' : ' false ' ,
' strict ' : ' true ' ,
} )
if missing_tracks :
tracks . extend ( missing_tracks )
return self . playlist_result (
self . _build_playlist ( tracks ) ,
compat_str ( playlist_id ) ,
playlist . get ( ' title ' ) , playlist . get ( ' description ' ) )