Merge pull request #130 from blackjack4494/master

Release 2020.11.07
release 2020.11.07
Tom-Oliver Heidel 4 years ago committed by GitHub
commit dca6e6bf9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,7 +20,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: '3.x' python-version: '3.8'
- name: Install packages - name: Install packages
run: sudo apt-get -y install zip pandoc man run: sudo apt-get -y install zip pandoc man
- name: Bump version - name: Bump version

@ -1,15 +1,15 @@
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc) [![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc) [![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc) [![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE) [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
youtube-dlc - download videos from youtube.com or other video platforms. youtube-dlc - download videos from youtube.com or other video platforms.
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
- [INSTALLATION](#installation) - [INSTALLATION](#installation)
- [UPDATE](#update)
- [DESCRIPTION](#description) - [DESCRIPTION](#description)
- [OPTIONS](#options) - [OPTIONS](#options)
- [Network Options:](#network-options) - [Network Options:](#network-options)
@ -44,6 +44,10 @@ You may want to use `python3` instead of `python`
python -m pip install --upgrade youtube-dlc python -m pip install --upgrade youtube-dlc
If you want to install the current master branch
python -m pip install git+https://github.com/blackjack4494/yt-dlc
**UNIX** (Linux, macOS, etc.) **UNIX** (Linux, macOS, etc.)
Using wget: Using wget:

@ -66,7 +66,7 @@ setup(
description=DESCRIPTION, description=DESCRIPTION,
long_description=LONG_DESCRIPTION, long_description=LONG_DESCRIPTION,
# long_description_content_type="text/markdown", # long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc", url="https://github.com/blackjack4494/yt-dlc",
packages=find_packages(exclude=("youtube_dl","test",)), packages=find_packages(exclude=("youtube_dl","test",)),
#packages=[ #packages=[
# 'youtube_dlc', # 'youtube_dlc',

@ -364,11 +364,12 @@ class FileDownloader(object):
else '%.2f' % sleep_interval)) else '%.2f' % sleep_interval))
time.sleep(sleep_interval) time.sleep(sleep_interval)
else: else:
sleep_interval_sub = self.params.get('sleep_interval_subtitles') if self.params.get('sleep_interval_subtitles') > 0:
self.to_screen( sleep_interval_sub = self.params.get('sleep_interval_subtitles')
'[download] Sleeping %s seconds...' % ( self.to_screen(
int(sleep_interval_sub))) '[download] Sleeping %s seconds...' % (
time.sleep(sleep_interval_sub) sleep_interval_sub))
time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict) return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):

@ -1544,4 +1544,5 @@ from .zattoo import (
) )
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE from .zingmp3 import ZingMp3IE
from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE

@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not url.startswith('http'):
url = '%s//%s' % (self.http_scheme(), url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data = self._search_regex( player_data = self._search_regex(

@ -12,6 +12,7 @@ from ..utils import (
parse_duration, parse_duration,
remove_end, remove_end,
try_get, try_get,
urljoin,
) )
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
{ {
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
'only_matching': True,
},
{
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
'only_matching': True,
} }
] ]
@ -110,7 +119,7 @@ class MailRuIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
page_config = self._parse_json(self._search_regex([ page_config = self._parse_json(self._search_regex([
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
r'(?s)"video":\s*(\{.+?\}),'], r'(?s)"video":\s*({.+?}),'],
webpage, 'page config', default='{}'), video_id, fatal=False) webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config: if page_config:
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl') meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
@ -121,7 +130,7 @@ class MailRuIE(InfoExtractor):
# fix meta_url if missing the host address # fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url): if re.match(r'^\/\+\/', meta_url):
meta_url = 'https://my.mail.ru' + meta_url meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url: if meta_url:
video_data = self._download_json( video_data = self._download_json(

@ -13,6 +13,7 @@ class SkyItaliaBaseIE(InfoExtractor):
'high': [854, 480], 'high': [854, 480],
'hd': [1280, 720] 'hd': [1280, 720]
} }
_GEO_BYPASS = False
def _extract_video_id(self, url): def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia') webpage = self._download_webpage(url, 'skyitalia')
@ -43,6 +44,9 @@ class SkyItaliaBaseIE(InfoExtractor):
'height': r[1] 'height': r[1]
}) })
if not formats and video_data.get('geob') == 1:
self.raise_geo_restricted(countries=['IT'])
self._sort_formats(formats) self._sort_formats(formats)
title = video_data.get('title') title = video_data.get('title')
thumb = video_data.get('thumb') thumb = video_data.get('thumb')

@ -308,19 +308,17 @@ class VikiIE(VikiBaseIE):
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
}) })
stream_ids = [] new_video = self._download_json(
for f in formats: 'https://www.viki.com/api/videos/%s' % video_id, video_id,
s_id = f.get('stream_id') 'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
if s_id is not None:
stream_ids.append(s_id)
subtitles = {} subtitles = {}
for subtitle_lang, _ in video.get('subtitle_completions', {}).items(): for sub in new_video.get('streamSubtitles').get('dash'):
subtitles[subtitle_lang] = [{ subtitles[sub.get('srclang')] = [{
'ext': subtitles_format, 'ext': 'vtt',
'url': self._prepare_call( 'url': sub.get('src'),
'videos/%s/subtitles/%s.%s?stream_id=%s' % (video_id, subtitle_lang, subtitles_format, stream_ids[0])), 'completion': sub.get('percentage'),
} for subtitles_format in ('srt', 'vtt')] }]
result = { result = {
'id': video_id, 'id': video_id,

@ -11,7 +11,6 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
remove_start,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
@ -19,10 +18,10 @@ from ..utils import (
class VLiveIE(NaverBaseIE): class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vlive.tv/video/1326', 'url': 'https://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
@ -32,8 +31,21 @@ class VLiveIE(NaverBaseIE):
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
}, },
}, { },
'url': 'http://www.vlive.tv/video/16937', {
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
@ -96,50 +108,69 @@ class VLiveIE(NaverBaseIE):
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) # url may match on a post or a video url with a post_id potentially matching a video_id
working_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, working_id)
'https://www.vlive.tv/video/%s' % video_id, video_id)
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' PARAMS_FIELD = 'params'
VIDEO_PARAMS_FIELD = 'video params'
params = self._search_regex(
params = self._parse_json(self._search_regex( PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, params = self._parse_json(params, working_id, fatal=False)
transform_source=lambda s: '[' + s + ']', fatal=False)
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
if not params or len(params) < 7:
params = self._search_regex( if video_params is None:
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) error = try_get(params, lambda x: x["postDetail"]["error"], dict)
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] error_data = try_get(error, lambda x: x["data"], dict)
error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
status, long_video_id, key = params[2], params[5], params[6] error_msg = try_get(error, lambda x: x["message"], compat_str)
status = remove_start(status, 'PRODUCT_') product_type = try_get(error_data,
[lambda x: x["officialVideo"]["productType"],
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): lambda x: x["board"]["boardType"]],
return self._live(video_id, webpage) compat_str)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
return self._replay(video_id, webpage, long_video_id, key) if error_video is not None:
if product_type in ('VLIVE_PLUS', 'VLIVE+'):
if status == 'LIVE_END': self.raise_login_required('This video is only available with V LIVE+.')
raise ExtractorError('Uploading for replay. Please wait...', elif error_msg is not None:
expected=True) raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
elif status == 'COMING_SOON': else:
raise ExtractorError('Coming soon!', expected=True) raise ExtractorError('Failed to extract video parameters.')
elif status == 'CANCELED': elif 'post' in url:
raise ExtractorError('We are sorry, ' raise ExtractorError('Url does not appear to be a video post.', expected=True)
'but the live broadcast has been canceled.', else:
expected=True) raise ExtractorError('Failed to extract video parameters.')
elif status == 'ONLY_APP':
raise ExtractorError('Unsupported video type', expected=True) video_id = working_id if 'video' in url else str(video_params["videoSeq"])
video_type = video_params["type"]
if video_type in ('VOD'):
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE':
return self._replay(video_id, webpage, params, video_params)
else:
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True)
elif video_type in ('LIVE'):
video_status = video_params["status"]
if video_status in ('RESERVED'):
raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'):
raise ExtractorError('Uploading for replay. Please wait...', expected=True)
else:
return self._live(video_id, webpage, params)
else: else:
raise ExtractorError('Unknown status %s' % status) raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage): def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
creator = self._html_search_regex( description = self._html_search_meta(
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', ['og:description', 'description', 'twitter:description'],
webpage, 'creator', fatal=False) webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
return { return {
'title': title, 'title': title,
@ -147,24 +178,21 @@ class VLiveIE(NaverBaseIE):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
def _live(self, video_id, webpage): def _live(self, video_id, webpage, params):
init_page = self._download_init_page(video_id) LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
live_params = self._search_regex( streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
formats = [] formats = []
for vid in live_params.get('resolutions', []): for stream in streams:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4', stream['serviceUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
fatal=False, live=True)) fatal=False, live=True))
self._sort_formats(formats) self._sort_formats(formats)
info = self._get_common_fields(webpage) info = self._get_common_fields(webpage, params)
info.update({ info.update({
'title': self._live_title(info['title']), 'title': self._live_title(info['title']),
'id': video_id, 'id': video_id,
@ -173,44 +201,37 @@ class VLiveIE(NaverBaseIE):
}) })
return info return info
def _replay(self, video_id, webpage, long_video_id, key): def _replay(self, video_id, webpage, params, video_params):
if '' in (long_video_id, key): long_video_id = video_params["vodId"]
init_page = self._download_init_page(video_id)
video_info = self._parse_json(self._search_regex( VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), headers={"referer": "https://www.vlive.tv"})
video_id) key = key_json["inkey"]
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
self.raise_login_required(
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
return merge_dicts( return merge_dicts(
self._get_common_fields(webpage), self._get_common_fields(webpage, params),
self._extract_video_info(video_id, long_video_id, key)) self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
class VLiveChannelIE(InfoExtractor): class VLiveChannelIE(InfoExtractor):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
_TEST = { _TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B', 'url': 'https://channels.vlive.tv/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
}, },
'playlist_mincount': 110 'playlist_mincount': 110
} }]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _real_extract(self, url): def _real_extract(self, url):

@ -279,6 +279,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs)) *args, **compat_kwargs(kwargs))
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
return return
@ -1390,6 +1399,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# https://github.com/ytdl-org/youtube-dl/pull/7599) # https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});', r';ytplayer\.config\s*=\s*({.+?});',
r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
) )
config = self._search_regex( config = self._search_regex(
patterns, webpage, 'ytplayer.config', default=None) patterns, webpage, 'ytplayer.config', default=None)
@ -1397,15 +1407,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial): def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = [] music_metadata = []
key_map = { key_map = {
@ -1454,10 +1455,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
try: try:
args = player_config['args'] if "args" in player_config and "ttsurl" in player_config["args"]:
caption_url = args.get('ttsurl') args = player_config['args']
if caption_url: caption_url = args['ttsurl']
timestamp = args['timestamp'] timestamp = args['timestamp']
# We get the available subtitles # We get the available subtitles
list_params = compat_urllib_parse_urlencode({ list_params = compat_urllib_parse_urlencode({
'type': 'list', 'type': 'list',
@ -1513,40 +1515,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions return captions
# New captions format as of 22.06.2017 # New captions format as of 22.06.2017
player_response = args.get('player_response') if "args" in player_config:
if player_response and isinstance(player_response, compat_str): player_response = player_config["args"].get('player_response')
player_response = self._parse_json( else:
player_response, video_id, fatal=False) # New player system (ytInitialPlayerResponse) as of October 2020
if player_response: player_response = player_config
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
caption_tracks = renderer['captionTracks'] if player_response:
for caption_track in caption_tracks: if isinstance(player_response, compat_str):
if 'kind' not in caption_track: player_response = self._parse_json(
# not an automatic transcription player_response, video_id, fatal=False)
continue
base_url = caption_track['baseUrl'] renderer = player_response['captions']['playerCaptionsTracklistRenderer']
sub_lang_list = [] caption_tracks = renderer['captionTracks']
for lang in renderer['translationLanguages']: for caption_track in caption_tracks:
lang_code = lang.get('languageCode') if 'kind' not in caption_track:
if lang_code: # not an automatic transcription
sub_lang_list.append(lang_code) continue
return make_captions(base_url, sub_lang_list) base_url = caption_track['baseUrl']
sub_lang_list = []
self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id) for lang in renderer['translationLanguages']:
return {} lang_code = lang.get('languageCode')
# Some videos don't provide ttsurl but rather caption_tracks and if lang_code:
# caption_translation_languages (e.g. 20LmZk1hakA) sub_lang_list.append(lang_code)
# Does not used anymore as of 22.06.2017 return make_captions(base_url, sub_lang_list)
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages'] self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] return {}
sub_lang_list = []
for lang in caption_translation_languages.split(','): if "args" in player_config:
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) args = player_config["args"]
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang: # Some videos don't provide ttsurl but rather caption_tracks and
sub_lang_list.append(sub_lang) # caption_translation_languages (e.g. 20LmZk1hakA)
return make_captions(caption_url, sub_lang_list) # Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang:
sub_lang_list.append(sub_lang)
return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are # An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles # no automatic captions but there are subtitles
except (KeyError, IndexError, ExtractorError): except (KeyError, IndexError, ExtractorError):
@ -1822,21 +1834,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: if ytplayer_config:
args = ytplayer_config['args'] args = ytplayer_config.get("args")
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args is not None:
# Convert to the same format returned by compat_parse_qs if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
video_info = dict((k, [v]) for k, v in args.items()) # Convert to the same format returned by compat_parse_qs
add_dash_mpd(video_info) video_info = dict((k, [v]) for k, v in args.items())
# Rental video is not rented but preview is available (e.g. add_dash_mpd(video_info)
# https://www.youtube.com/watch?v=yYr8q0y5Jfg, # Rental video is not rented but preview is available (e.g.
# https://github.com/ytdl-org/youtube-dl/issues/10532) # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
if not video_info and args.get('ypc_vid'): # https://github.com/ytdl-org/youtube-dl/issues/10532)
return self.url_result( if not video_info and args.get('ypc_vid'):
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) return self.url_result(
if args.get('livestream') == '1' or args.get('live_playback') == 1: args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
is_live = True if args.get('livestream') == '1' or args.get('live_playback') == 1:
if not player_response: is_live = True
player_response = extract_player_response(args.get('player_response'), video_id) if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
else: else:
@ -1866,8 +1881,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False age_gate = False
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: args = ytplayer_config.get("args")
args = ytplayer_config['args'] if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items()) video_info = dict((k, [v]) for k, v in args.items())
@ -1882,6 +1897,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True is_live = True
if not player_response: if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id) player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
@ -2614,6 +2631,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'
_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
_YTM_CHANNEL_INFO = {
'uploader': 'Youtube Music',
'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
'uploader_url': 'https://www.youtube.com/music'
}
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
@ -2811,10 +2834,21 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return zip(ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page)
def _extract_mix_ids_from_yt_initial(self, yt_initial):
ids = []
playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
if playlist_contents:
for item in playlist_contents:
videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
if videoId:
ids.append(videoId)
return ids
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
ids = [] ids = []
yt_initial = None
last_id = playlist_id[-11:] last_id = playlist_id[-11:]
for n in itertools.count(1): for n in itertools.count(1):
url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
@ -2824,6 +2858,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r'''(?xs)data-video-username=".*?".*? r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id), href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage)) webpage))
# if no ids in html of page, try using embedded json
if (len(new_ids) == 0):
yt_initial = self._get_yt_initial_data(playlist_id, webpage)
if yt_initial:
new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
# Fetch new pages until all the videos are repeated, it seems that # Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos. # there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids] new_ids = [_id for _id in new_ids if _id not in ids]
@ -2841,6 +2882,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
or search_title('title')) or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
if not title:
title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)
def _extract_playlist(self, playlist_id): def _extract_playlist(self, playlist_id):
@ -2902,6 +2946,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
}) })
if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
playlist.update(self._YTM_CHANNEL_INFO)
return has_videos, playlist return has_videos, playlist
@ -2932,8 +2978,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return video return video
if playlist_id.startswith(('RD', 'UL', 'PU')): if playlist_id.startswith(('RD', 'UL', 'PU')):
# Mixes require a custom extraction process if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
return self._extract_mix(playlist_id) # Mixes require a custom extraction process,
# Youtube Music playlists act like normal playlists (with randomized order)
return self._extract_mix(playlist_id)
has_videos, playlist = self._extract_playlist(playlist_id) has_videos, playlist = self._extract_playlist(playlist_id)
if has_videos or not video_id: if has_videos or not video_id:

@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
parse_filesize,
urlencode_postdata
)
class ZoomIE(InfoExtractor):
IE_NAME = 'zoom'
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/play/(?P<id>[A-Za-z0-9\-_]+)'
_TEST = {
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
'info_dict': {
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
'ext': "mp4"
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
if password_protected is not None:
self._verify_video_password(url, display_id, webpage)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
formats = []
formats.append({
'url': url_or_none(video_url),
'width': int_or_none(viewResolvtionsWidth),
'height': int_or_none(viewResolvtionsHeight),
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
'Referer': urlprefix},
'ext': "mp4",
'filesize_approx': int_or_none(fileSize)
})
self._sort_formats(formats)
return {
'id': display_id,
'title': title,
'formats': formats
}
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
data = urlencode_postdata({
'id': meetId,
'passwd': password,
'action': "viewdetailedpage",
'recaptcha': ""
})
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
validation_response = self._download_json(
validation_url, video_id,
note='Validating Password...',
errnote='Wrong password?',
data=data)
if validation_response['errorCode'] != 0:
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))

@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None):
'along with --min-sleep-interval.')) 'along with --min-sleep-interval.'))
workarounds.add_option( workarounds.add_option(
'--sleep-subtitles', '--sleep-subtitles',
dest='sleep_interval_subtitles', action='store_true', default=0, dest='sleep_interval_subtitles', default=0, type=int,
help='Enforce sleep interval on subtitles as well') help='Enforce sleep interval on subtitles as well')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')

@ -412,7 +412,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
for lang, sub_info in subtitles.items(): for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext'] sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': if sub_ext == 'json':
self._downloader.to_screen('[ffmpeg] JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang) sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else: else:
@ -643,13 +645,18 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self._downloader.to_screen( self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue continue
elif ext == 'json':
self._downloader.to_screen(
'[ffmpeg] You have requested to convert json subtitles into another format, '
'which is currently not possible')
continue
old_file = subtitles_filename(filename, lang, ext, info.get('ext')) old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file) sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'): if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning( self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, ' '[ffmpeg] You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss') 'which results in style information loss')
dfxp_file = old_file dfxp_file = old_file

Loading…
Cancel
Save