@ -1,6 +1,7 @@
# coding: utf-8
# coding: utf-8
from __future__ import unicode_literals
from __future__ import unicode_literals
import json
import re
import re
from . theplatform import ThePlatformIE
from . theplatform import ThePlatformIE
@ -20,6 +21,13 @@ class AENetworksBaseIE(ThePlatformIE):
_THEPLATFORM_KEY = ' crazyjava '
_THEPLATFORM_KEY = ' crazyjava '
_THEPLATFORM_SECRET = ' s3cr3t '
_THEPLATFORM_SECRET = ' s3cr3t '
def _parse_theplatform_metadata ( self , info ) :
metadata = super ( ) . _parse_theplatform_metadata ( info )
metadata [ ' season_number ' ] = int ( info . get ( ' AETN$season ' ) )
metadata [ ' episode_number ' ] = int ( info . get ( ' AETN$episode ' ) )
metadata [ ' series ' ] = info . get ( ' AETN$seriesNameGlobal ' )
return metadata
def _extract_aen_smil ( self , smil_url , video_id , auth = None ) :
def _extract_aen_smil ( self , smil_url , video_id , auth = None ) :
query = { ' mbr ' : ' true ' }
query = { ' mbr ' : ' true ' }
if auth :
if auth :
@ -63,7 +71,7 @@ class AENetworksIE(AENetworksBaseIE):
IE_DESC = ' A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault '
IE_DESC = ' A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault '
_VALID_URL = r ''' (?x)
_VALID_URL = r ''' (?x)
https ? : / /
https ? : / /
( ? : www \. ) ?
( ? : ( ? P < subdomain > www | play ) \. ) ?
( ? P < domain >
( ? P < domain >
( ? : history ( ? : vault ) ? | aetv | mylifetime | lifetimemovieclub ) \. com |
( ? : history ( ? : vault ) ? | aetv | mylifetime | lifetimemovieclub ) \. com |
fyi \. tv
fyi \. tv
@ -75,11 +83,26 @@ class AENetworksIE(AENetworksBaseIE):
collections / [ ^ / ] + / ( ? P < collection_display_id > [ ^ / ] + )
collections / [ ^ / ] + / ( ? P < collection_display_id > [ ^ / ] + )
)
)
'''
'''
_GRAPHQL_QUERY = """
fragment video on Video {
publicUrl
}
query getUserVideo ( $ videoId : ID ! ) {
video ( id : $ videoId ) {
. . . video
}
}
"""
_TESTS = [ {
_TESTS = [ {
' url ' : ' http://www.history.com/shows/mountain-men/season-1/episode-1 ' ,
' url ' : ' http://www.history.com/shows/mountain-men/season-1/episode-1 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 22253814 ' ,
' id ' : ' 22253814 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' series ' : ' Mountain Men ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
' title ' : ' Winter is Coming ' ,
' title ' : ' Winter is Coming ' ,
' description ' : ' md5:641f424b7a19d8e24f26dea22cf59d74 ' ,
' description ' : ' md5:641f424b7a19d8e24f26dea22cf59d74 ' ,
' timestamp ' : 1338306241 ,
' timestamp ' : 1338306241 ,
@ -91,19 +114,60 @@ class AENetworksIE(AENetworksBaseIE):
' skip_download ' : True ,
' skip_download ' : True ,
} ,
} ,
' add_ie ' : [ ' ThePlatform ' ] ,
' add_ie ' : [ ' ThePlatform ' ] ,
} , {
' url ' : ' http://play.history.com/shows/mountain-men/season-1/episode-1 ' ,
' info_dict ' : {
' id ' : ' 22253814 ' ,
' ext ' : ' mp4 ' ,
' series ' : ' Mountain Men ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
' title ' : ' Winter Is Coming ' ,
' description ' : ' md5:a40e370925074260b1c8a633c632c63a ' ,
' timestamp ' : 1338306241 ,
' upload_date ' : ' 20120529 ' ,
' uploader ' : ' AENE-NEW ' ,
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
' add_ie ' : [ ' ThePlatform ' ] ,
} , {
} , {
' url ' : ' http://www.history.com/shows/ancient-aliens/season-1 ' ,
' url ' : ' http://www.history.com/shows/ancient-aliens/season-1 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 71889446852 ' ,
' id ' : ' 71889446852 ' ,
' title ' : ' Ancient Aliens '
} ,
} ,
' playlist_mincount ' : 5 ,
' playlist_mincount ' : 5 ,
} , {
} , {
' url ' : ' http://www.mylifetime.com/shows/atlanta-plastic ' ,
' url ' : ' http://www.mylifetime.com/shows/ marrying-millions ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' SERIES4317 ' ,
' id ' : ' SERIES 6093 ' ,
' title ' : ' Atlanta Plastic ' ,
' title ' : ' Marrying Millions ' ,
} ,
} ,
' playlist_mincount ' : 2 ,
' playlist_mincount ' : 1 ,
} , {
' url ' : ' http://www.mylifetime.com/shows/marrying-millions/season-1 ' ,
' info_dict ' : {
' id ' : ' 269343782619 ' ,
' title ' : ' Marrying Millions ' ,
} ,
' playlist_mincount ' : 10 ,
} , {
' url ' : ' https://play.mylifetime.com/shows/marrying-millions/season-1 ' ,
' info_dict ' : {
' id ' : ' SERIES6093 ' ,
' title ' : ' Marrying Millions ' ,
} ,
' playlist_mincount ' : 10 ,
} , {
' url ' : ' https://play.mylifetime.com/shows/marrying-millions ' ,
' info_dict ' : {
' id ' : ' SERIES6093 ' ,
' title ' : ' Marrying Millions ' ,
} ,
' playlist_mincount ' : 11 ,
} , {
} , {
' url ' : ' http://www.aetv.com/shows/duck-dynasty/season-9/episode-1 ' ,
' url ' : ' http://www.aetv.com/shows/duck-dynasty/season-9/episode-1 ' ,
' only_matching ' : True
' only_matching ' : True
@ -137,56 +201,92 @@ class AENetworksIE(AENetworksBaseIE):
' fyi.tv ' : ' FYI ' ,
' fyi.tv ' : ' FYI ' ,
}
}
def _real_extract ( self , url ) :
def _extract_playlist ( self , url , webpage , display_id , subdomain , url_parts ) :
domain , show_path , movie_display_id , special_display_id , collection_display_id = re . match ( self . _VALID_URL , url ) . groups ( )
# The "play" is pretty distinct from the normal sites, however, it contains all the data we need in a JSON blob.
display_id = show_path or movie_display_id or special_display_id or collection_display_id
if subdomain == ' play ' :
webpage = self . _download_webpage ( url , display_id , headers = self . geo_verification_headers ( ) )
series_id = self . _search_regex ( r ' showid/(SERIES[0-9]+) ' , webpage , ' series id ' )
if show_path :
season_num = int_or_none ( self . _search_regex ( r ' /season-([0-9]+)/? ' , url , ' season number ' , fatal = False , default = None ) )
url_parts = show_path . split ( ' / ' )
show_data = self . _parse_json (
self . _search_regex ( r ' (?s)<script[^>]+id= " __NEXT_DATA__ " [^>]*>(.+?)</script ' , webpage , ' show data ' ) ,
series_id , fatal = True )
if show_data :
apolloState = show_data . get ( ' props ' , { } ) . get ( ' apolloState ' , { } )
entries = [ ]
for key , episode in apolloState . items ( ) :
if not key . startswith ( ' Episode: ' ) or series_id != episode . get ( ' seriesId ' ) :
continue
# If a season number was specified in the URL, filter out any episodes that don't match.
if season_num and season_num != episode . get ( ' tvSeasonNumber ' ) :
continue
episode_url = compat_urlparse . urljoin ( url , episode . get ( ' canonical ' ) )
entries . append ( self . url_result ( episode_url , ' AENetworks ' , episode . get ( ' id ' ) , episode . get ( ' title ' ) ) )
series_name = apolloState . get ( ' Series: %s ' % series_id , { } ) . get ( ' title ' )
return self . playlist_result ( entries , series_id , series_name )
else :
series_title = self . _html_search_meta ( ' aetn:SeriesTitle ' , webpage )
url_parts_len = len ( url_parts )
url_parts_len = len ( url_parts )
if url_parts_len == 1 :
if url_parts_len == 1 :
entries = [ ]
entries = [ ]
for season_url_path in re . findall ( r ' (?s)<li[^>]+data-href= " (/shows/ %s /season- \ d+) " ' % url_parts [ 0 ] , webpage ) :
for season_url_path in re . findall ( r ' (?s)< a[^>]+ href=" (/shows/ %s /season- \ d+) " ' % url_parts [ 0 ] , webpage ) :
entries . append ( self . url_result (
entries . append ( self . url_result (
compat_urlparse . urljoin ( url , season_url_path ) , ' AENetworks ' ) )
compat_urlparse . urljoin ( url , season_url_path ) , ' AENetworks ' ) )
if entries :
if entries :
return self . playlist_result (
return self . playlist_result (
entries , self . _html_search_meta ( ' aetn:SeriesId ' , webpage ) ,
entries , self . _html_search_meta ( ' aetn:SeriesId ' , webpage ) , series_title )
self . _html_search_meta ( ' aetn:SeriesTitle ' , webpage ) )
raise ExtractorError ( ' Failed to extract seasons for show: %s ' % url_parts [ 0 ] )
else :
# single season
url_parts_len = 2
if url_parts_len == 2 :
if url_parts_len == 2 :
entries = [ ]
entries = [ ]
for episode_item in re . findall ( r ' (?s)<[^>]+class= " [^ " ]*(?:episode|program)-item[^ " ]* " [^>]*> ' , webpage ) :
for episode_item in re . findall ( r ' (?s)<[^>]+data-episodetype[^>]*> ' , webpage ) :
episode_attributes = extract_attributes ( episode_item )
episode_attributes = extract_attributes ( episode_item )
episode_url = compat_urlparse . urljoin (
episode_url = compat_urlparse . urljoin (
url , episode_attributes [ ' data-canonical ' ] )
url , episode_attributes [ ' data-canonical ' ] )
entries. append ( self . url_result (
video_id = episode_attributes . get ( ' data-videoid ' ) or episode_attributes . get ( ' data-video-id ' )
episode_url , ' AENetworks ' ,
episode_title = episode_attributes . get ( ' aria-label ' )
episode_attributes . get ( ' data-videoid ' ) or episode_attributes . get ( ' data-video-id ' ) ) )
entries . append ( self . url_result ( episode_url , ' AENetworks ' , video_id , episode_title ) )
return self . playlist_result (
return self . playlist_result (
entries , self . _html_search_meta ( ' aetn:SeasonId ' , webpage ) )
entries , self . _html_search_meta ( ' aetn:SeasonId ' , webpage ) , series_title )
raise ExtractorError ( ' Failed to extract playlist ' , video_id = display_id )
video_id = self . _html_search_meta ( ' aetn:VideoID ' , webpage )
media_url = self . _search_regex (
def _real_extract ( self , url ) :
[ r " media_url \ s*= \ s* ' (?P<url>[^ ' ]+) ' " ,
subdomain , domain , show_path , movie_display_id , special_display_id , collection_display_id = re . match ( self . _VALID_URL , url ) . groups ( )
r ' data-media-url=(?P<url>(?:https?:)?//[^ \ s>]+) ' ,
display_id = show_path or movie_display_id or special_display_id or collection_display_id
r ' data-media-url=([ " \' ])(?P<url>(?:(?! \ 1).)+?) \ 1 ' ] ,
webpage = self . _download_webpage ( url , display_id , headers = self . geo_verification_headers ( ) )
webpage , ' video url ' , group = ' url ' )
if show_path :
url_parts = show_path . split ( ' / ' )
# If there's only the show name and/or season number then we'll need to extract a playlist.
if len ( url_parts ) < 3 :
return self . _extract_playlist ( url , webpage , display_id , subdomain , url_parts )
requestor_id = self . _DOMAIN_TO_REQUESTOR_ID [ domain ]
video_id = self . _html_search_meta ( [ ' videoId ' , ' aetn:VideoID ' ] , webpage )
# Make a GraphQL query to get the episode URL as they no longer directly embed it in the response webpage.
video_data = self . _download_json (
' https://yoga.appsvcs.aetnd.com/graphql?brand= %s &mode=live&platform=web ' % ( requestor_id . lower ( ) ) , video_id ,
data = json . dumps (
{
' operationName ' : ' getUserVideo ' ,
' variables ' : { ' videoId ' : video_id } ,
' query ' : self . _GRAPHQL_QUERY ,
} ) . encode ( ' utf-8 ' ) ,
headers = { ' Content-Type ' : ' application/json ' } )
media_url = video_data . get ( ' data ' , { } ) . get ( ' video ' , { } ) . get ( ' publicUrl ' )
if not media_url :
raise ExtractorError ( ' Failed to extract media URL ' , video_id = video_id )
theplatform_metadata = self . _download_theplatform_metadata ( self . _search_regex (
theplatform_metadata = self . _download_theplatform_metadata ( self . _search_regex (
r ' https?://link \ .theplatform \ .com/s/([^?]+) ' , media_url , ' theplatform_path ' ) , video_id )
r ' https?://link \ .theplatform \ .com/s/([^?]+) ' , media_url , ' theplatform_path ' ) , video_id )
info = self . _parse_theplatform_metadata ( theplatform_metadata )
info = self . _parse_theplatform_metadata ( theplatform_metadata )
auth = None
auth = None
if theplatform_metadata . get ( ' AETN$isBehindWall ' ) :
if theplatform_metadata . get ( ' AETN$isBehindWall ' ) :
requestor_id = self . _DOMAIN_TO_REQUESTOR_ID [ domain ]
resource = self . _get_mvpd_resource (
resource = self . _get_mvpd_resource (
requestor_id , theplatform_metadata [ ' title ' ] ,
requestor_id , theplatform_metadata [ ' title ' ] ,
theplatform_metadata . get ( ' AETN$PPL_pplProgramId ' ) or theplatform_metadata . get ( ' AETN$PPL_pplProgramId_OLD ' ) ,
theplatform_metadata . get ( ' AETN$PPL_pplProgramId ' ) or theplatform_metadata . get ( ' AETN$PPL_pplProgramId_OLD ' ) ,
theplatform_metadata [ ' ratings ' ] [ 0 ] [ ' rating ' ] )
theplatform_metadata [ ' ratings ' ] [ 0 ] [ ' rating ' ] )
auth = self . _extract_mvpd_auth (
auth = self . _extract_mvpd_auth (
url , video_id , requestor_id , resource )
url , video_id , requestor_id , resource )
info . update ( self . _search_json_ld ( webpage , video_id , fatal = False ) )
# JSON-LD data isn't present on the play subdomain webpages.
if subdomain != ' play ' :
info . update ( self . _search_json_ld ( webpage , video_id , fatal = False ) )
info . update ( self . _extract_aen_smil ( media_url , video_id , auth ) )
info . update ( self . _extract_aen_smil ( media_url , video_id , auth ) )
return info
return info