Compare commits
201 Commits
2020.10.26
...
master
Author | SHA1 | Date |
---|---|---|
Tom-Oliver Heidel | f9401f2a91 | 3 years ago |
Heidel | a915526e08 | 3 years ago |
Tom-Oliver Heidel | db6926618f | 3 years ago |
Tom-Oliver Heidel | 19a6fa72eb | 3 years ago |
Tom-Oliver Heidel | 98e248faa4 | 4 years ago |
Unknown | 40ec740f7b | 4 years ago |
Tom-Oliver Heidel | 8662875551 | 4 years ago |
nixxo | 4f618e64f5 | 4 years ago |
Remita Amine | 12300fa45a | 4 years ago |
Tom-Oliver Heidel | e8d46fc979 | 4 years ago |
Tom-Oliver Heidel | b662fc8d20 | 4 years ago |
Tom-Oliver Heidel | 929576bb9e | 4 years ago |
Tom-Oliver Heidel | 7dde463e86 | 4 years ago |
Tom-Oliver Heidel | e29288d667 | 4 years ago |
Tom-Oliver Heidel | 9e4043faa9 | 4 years ago |
Tom-Oliver Heidel | 94c29091d0 | 4 years ago |
Tom-Oliver Heidel | 7b400ac40b | 4 years ago |
Tom-Oliver Heidel | e8dfaa0fd7 | 4 years ago |
Tom-Oliver Heidel | 9693a34773 | 4 years ago |
Tom-Oliver Heidel | 6a03f4f2a8 | 4 years ago |
Tom-Oliver Heidel | 6248b34ad2 | 4 years ago |
Tom-Oliver Heidel | 8e423ae86a | 4 years ago |
Tom-Oliver Heidel | 284ec6f48a | 4 years ago |
Tom-Oliver Heidel | bccdb02e93 | 4 years ago |
Tom-Oliver Heidel | ef5a4db06c | 4 years ago |
bopol | 9b664dc420 | 4 years ago |
Tom-Oliver Heidel | 93201d50aa | 4 years ago |
lorpus | ae7c01431d | 4 years ago |
pukkandan | c78b936af4 | 4 years ago |
pukkandan | 2fa90513e5 | 4 years ago |
pukkandan | f0c532a430 | 4 years ago |
pukkandan | a62cf34298 | 4 years ago |
pukkandan | 38d7028407 | 4 years ago |
pukkandan | 02ced43cbf | 4 years ago |
Tom-Oliver Heidel | 17fbbff940 | 4 years ago |
pukkandan | 3d3dddc948 | 4 years ago |
pukkandan | 70d5c17b08 | 4 years ago |
pukkandan | 70c5802b5d | 4 years ago |
pukkandan | a93f71ee5e | 4 years ago |
xypwn | f8fb3b8a78 | 4 years ago |
pukkandan | 036fcf3aa1 | 4 years ago |
pukkandan | 434406a982 | 4 years ago |
pukkandan | 386e1dd908 | 4 years ago |
pukkandan | 7bd4a9b611 | 4 years ago |
pukkandan | ef2f3c7f58 | 4 years ago |
pukkandan | a0566bbf5c | 4 years ago |
pukkandan | 3462ffa892 | 4 years ago |
pukkandan | d3260f40cb | 4 years ago |
pukkandan | 097f1663a9 | 4 years ago |
pukkandan | 8bdd16b499 | 4 years ago |
Diego Fernando Rodríguez Varón | d71eb83b05 | 4 years ago |
Tom-Oliver Heidel | 228385340e | 4 years ago |
Tom-Oliver Heidel | 63dcccd07c | 4 years ago |
Kyu Yeun Kim | d02f12107f | 4 years ago |
lorpus | d9c2b0a6de | 4 years ago |
lorpus | 2b547dd782 | 4 years ago |
pukkandan | ec57f903c9 | 4 years ago |
renalid | 711bd5d362 | 4 years ago |
Matthew | 9da76d30de | 4 years ago |
pukkandan | 958804ad4e | 4 years ago |
pukkandan | 55faba7ed7 | 4 years ago |
Matthew | 0366ae8756 | 4 years ago |
Diego Fernando Rodríguez Varón | a2044d57ca | 4 years ago |
pukkandan | ea6e0c2b0d | 4 years ago |
Jody Bruchon | 63c00011d4 | 4 years ago |
pukkandan | fe5caa2a7c | 4 years ago |
pukkandan | 9a68de1217 | 4 years ago |
Tom-Oliver Heidel | d052b9a112 | 4 years ago |
Unknown | 5e6cdcecdd | 4 years ago |
Tom-Oliver Heidel | c297a6c661 | 4 years ago |
Unknown | 6bd79800c3 | 4 years ago |
nao20010128nao | a1d6041497 | 4 years ago |
Tom-Oliver Heidel | b28e751688 | 4 years ago |
Tom-Oliver Heidel | 7ee5015a34 | 4 years ago |
Tom-Oliver Heidel | 00c38ef28d | 4 years ago |
Tom-Oliver Heidel | 34861f1c96 | 4 years ago |
Unknown | 104bfdd24d | 4 years ago |
Luc Ritchie | 73ac856785 | 4 years ago |
Tom-Oliver Heidel | d91fdaff03 | 4 years ago |
Tom-Oliver Heidel | c54f4aada5 | 4 years ago |
Unknown | 0f8566e90b | 4 years ago |
Tom-Oliver Heidel | 0e0cffb8fe | 4 years ago |
rigstot | d7aec208f2 | 4 years ago |
Tom-Oliver Heidel | 69e3c6df5c | 4 years ago |
pukkandan | 002ea8fe17 | 4 years ago |
Tom-Oliver Heidel | c924a219ea | 4 years ago |
Roman Karwacik | 8f109ad4ad | 4 years ago |
Luc Ritchie | 9833e7a015 | 4 years ago |
Tom-Oliver Heidel | da8fb75df5 | 4 years ago |
Robin Dunn | 142f2c8e99 | 4 years ago |
Ali Sherief | 876f1c17ff | 4 years ago |
nixxo | 5867a16789 | 4 years ago |
nixxo | 8924ddc3ee | 4 years ago |
nixxo | 595188ec71 | 4 years ago |
nixxo | 6c1c3e5b85 | 4 years ago |
nixxo | 902784a2a9 | 4 years ago |
Diego Fernando Rodríguez Varón | fff5071112 | 4 years ago |
WolfganP | 85da4055c0 | 4 years ago |
WolfganP | 6857df609b | 4 years ago |
Nicolas SAPA | 8263104fe4 | 4 years ago |
Nicolas SAPA | b860e4cc2f | 4 years ago |
Tom-Oliver Heidel | 651bae3d23 | 4 years ago |
Tom-Oliver Heidel | 5943bb6214 | 4 years ago |
Tom-Oliver Heidel | 4a82c025da | 4 years ago |
Unknown | 7d94c06743 | 4 years ago |
Unknown | 5db4014b23 | 4 years ago |
The Hatsune Daishi | 987d2e079a | 4 years ago |
nixxo | 8abd647c59 | 4 years ago |
Tom-Oliver Heidel | adb118da26 | 4 years ago |
Tom-Oliver Heidel | 06a8be981b | 4 years ago |
Tom-Oliver Heidel | f406ab6a14 | 4 years ago |
Tom-Oliver Heidel | 206de9b233 | 4 years ago |
Tom-Oliver Heidel | 123049d1ce | 4 years ago |
Tom-Oliver Heidel | f8ddb38977 | 4 years ago |
pukkandan | 503d4a44f6 | 4 years ago |
insaneracist | 366a7a4753 | 4 years ago |
insaneracist | 7f4f0b21c2 | 4 years ago |
insaneracist | 659ddd7f70 | 4 years ago |
nixxo | ab36800b1f | 4 years ago |
exwm | 9c8bc84fd2 | 4 years ago |
exwm | c434e9f504 | 4 years ago |
exwm | be5d6c213c | 4 years ago |
insaneracist | 15f6397c19 | 4 years ago |
Tom-Oliver Heidel | 7166f47b18 | 4 years ago |
Tom-Oliver Heidel | 471115dbee | 4 years ago |
Tom-Oliver Heidel | 8934f61717 | 4 years ago |
Tom-Oliver Heidel | 4481cfb570 | 4 years ago |
Roman Sebastian Karwacik | b11a88fc24 | 4 years ago |
Roman Sebastian Karwacik | aa13f124a5 | 4 years ago |
Roman Sebastian Karwacik | 81acad1279 | 4 years ago |
Roman Sebastian Karwacik | abd273e17b | 4 years ago |
Roman Sebastian Karwacik | 55cd2999ed | 4 years ago |
Roman Sebastian Karwacik | ef6be42014 | 4 years ago |
Roman Sebastian Karwacik | 3f0852e35f | 4 years ago |
exwm | 130599af94 | 4 years ago |
exwm | 73cc1b9125 | 4 years ago |
exwm | 3417362556 | 4 years ago |
exwm | 8ba3ad0a48 | 4 years ago |
exwm | 1923b146b3 | 4 years ago |
exwm | 5dcfd2508a | 4 years ago |
exwm | 0536e60b48 | 4 years ago |
insaneracist | 5c15c1a0d7 | 4 years ago |
Tom-Oliver Heidel | 167c108f70 | 4 years ago |
Diego Fernando Rodríguez Varón | 60351178a5 | 4 years ago |
Tom-Oliver Heidel | 764876a01f | 4 years ago |
Unknown | 31108ce946 | 4 years ago |
Unknown | ae306df7e0 | 4 years ago |
nixxo | e61f360157 | 4 years ago |
insaneracist | 712799bd30 | 4 years ago |
Tom-Oliver Heidel | cf37b9f875 | 4 years ago |
Unknown | 7fb5f2f29d | 4 years ago |
Tom-Oliver Heidel | 200959ec76 | 4 years ago |
Tom-Oliver Heidel | 4f51913680 | 4 years ago |
Tom-Oliver Heidel | b8c6e56725 | 4 years ago |
Tom-Oliver Heidel | 2c01ee48e0 | 4 years ago |
Tom-Oliver Heidel | 0ae154c4b4 | 4 years ago |
Tom-Oliver Heidel | e2d5e9a361 | 4 years ago |
Tom-Oliver Heidel | 12ae240c36 | 4 years ago |
Tom-Oliver Heidel | 76c2df5f0a | 4 years ago |
Tom-Oliver Heidel | 90c3f039e2 | 4 years ago |
Unknown | fa57af1ef3 | 4 years ago |
Tom-Oliver Heidel | da6403d340 | 4 years ago |
Tom-Oliver Heidel | 9891884768 | 4 years ago |
Unknown | 9f448fcb26 | 4 years ago |
Peter Oettig | 59c5fa91c1 | 4 years ago |
Tobias Gruetzmacher | 7db1d2a69e | 4 years ago |
Dan Walker | 3086aa194f | 4 years ago |
insaneracist | 5b0a6a8010 | 4 years ago |
insaneracist | 9322f1162d | 4 years ago |
nixxo | 920ad13673 | 4 years ago |
nixxo | 508649e6f5 | 4 years ago |
bopol | bb8a73a0e2 | 4 years ago |
Unknown | 4932ba4aec | 4 years ago |
Unknown | 0704d2224b | 4 years ago |
insaneracist | 139e10ad98 | 4 years ago |
insaneracist | 576d233fe6 | 4 years ago |
nixxo | a85e131b48 | 4 years ago |
nixxo | 165ce9f773 | 4 years ago |
Tom-Oliver Heidel | ddb77f30ee | 4 years ago |
nixxo | 81a20463a4 | 4 years ago |
nixxo | 0b72c2bc31 | 4 years ago |
nixxo | c3c18d7b8a | 4 years ago |
insaneracist | ccec6955f3 | 4 years ago |
Tom-Oliver Heidel | cf553deceb | 4 years ago |
nixxo | 6c6ee4905f | 4 years ago |
Tom-Oliver Heidel | 1808b9e28c | 4 years ago |
Unknown | 079a941282 | 4 years ago |
Unknown | 3467b3e28f | 4 years ago |
Tom-Oliver Heidel | 565e61d488 | 4 years ago |
Tom-Oliver Heidel | ba2136cba2 | 4 years ago |
insaneracist | 48aac9fc86 | 4 years ago |
Peter Oettig | 67b19799a5 | 4 years ago |
Unknown | 6f8557ec4d | 4 years ago |
amigatomte | fb9c36c45f | 4 years ago |
Dan Walker | 87ab4fb11a | 4 years ago |
Tom-Oliver Heidel | ee69b02a4f | 4 years ago |
bopol | 9754a441e3 | 4 years ago |
nixxo | d4ca287459 | 4 years ago |
nixxo | a916af123c | 4 years ago |
nixxo | 5fec75c81c | 4 years ago |
Tom-Oliver Heidel | 6410992da9 | 4 years ago |
@ -1 +1 @@
|
||||
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico
|
||||
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico --upx-exclude=vcruntime140.dll
|
@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AmaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# Youtube
|
||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||
'info_dict': {
|
||||
'id': 'h6ZuVdvYnfE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why jury trials are becoming less common',
|
||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20160813',
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||
'info_dict': {
|
||||
'id': '18622084',
|
||||
'ext': 'mov',
|
||||
'title': 'Vimeo at CES 2011!',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||
'info_dict': {
|
||||
'id': 's8KL7I3jLmh6',
|
||||
'ext': 'mp4',
|
||||
'title': 'The danger of a single story',
|
||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
subtitles = {}
|
||||
for language in (meta.get('languages') or []):
|
||||
subtitles_uri = language.get('subtitles_uri')
|
||||
if not (subtitles_uri and language.get('published')):
|
||||
continue
|
||||
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||
for f in ('json', 'srt', 'vtt'):
|
||||
subtitle.append({
|
||||
'ext': f,
|
||||
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||
})
|
||||
|
||||
info = {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('thumbnail'),
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'timestamp': parse_iso8601(meta.get('created')),
|
||||
}
|
||||
|
||||
for ie in (YoutubeIE, VimeoIE):
|
||||
if ie.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ie.ie_key(),
|
||||
})
|
||||
break
|
||||
|
||||
return info
|
@ -0,0 +1,61 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BitwaveReplayIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:replay'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
replay_id = self._match_id(url)
|
||||
replay = self._download_json(
|
||||
'https://api.bitwave.tv/v1/replays/' + replay_id,
|
||||
replay_id
|
||||
)
|
||||
|
||||
return {
|
||||
'id': replay_id,
|
||||
'title': replay['data']['title'],
|
||||
'uploader': replay['data']['name'],
|
||||
'uploader_id': replay['data']['name'],
|
||||
'url': replay['data']['url'],
|
||||
'thumbnails': [
|
||||
{'url': x} for x in replay['data']['thumbnails']
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class BitwaveStreamIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/doomtube',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
channel = self._download_json(
|
||||
'https://api.bitwave.tv/v1/channels/' + username,
|
||||
username)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
channel['data']['url'], username,
|
||||
'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': username,
|
||||
'title': self._live_title(channel['data']['title']),
|
||||
'uploader': username,
|
||||
'uploader_id': username,
|
||||
'formats': formats,
|
||||
'thumbnail': channel['data']['thumbnail'],
|
||||
'is_live': True,
|
||||
'view_count': channel['data']['viewCount']
|
||||
}
|
@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
# try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
'info_dict': {
|
||||
'id': '510727257538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||
'uploader': 'MLS Video',
|
||||
'timestamp': 1566320259,
|
||||
'upload_date': '20190820',
|
||||
'uploader_id': '235196876',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, file_id)
|
||||
request_token = self._parse_json(self._search_regex(
|
||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||
'Box config'), file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
'BoxApi': 'shared_link=' + shared_link,
|
||||
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||
}, query={
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||
})
|
||||
title = f['name']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'shared_link': shared_link
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||
# entry_url_template = try_get(
|
||||
# entry, lambda x: x['content']['url_template'])
|
||||
# if not entry_url_template:
|
||||
# continue
|
||||
# representation = entry.get('representation')
|
||||
# if representation == 'dash':
|
||||
# TODO: append query to every fragment URL
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||
# file_id, query=query))
|
||||
|
||||
authenticated_download_url = f.get('authenticated_download_url')
|
||||
if authenticated_download_url and f.get('is_download_available'):
|
||||
formats.append({
|
||||
'ext': f.get('extension') or determine_ext(title),
|
||||
'filesize': f.get('size'),
|
||||
'format_id': 'download',
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
return {
|
||||
'id': file_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': f.get('description') or None,
|
||||
'uploader': creator.get('name'),
|
||||
'timestamp': parse_iso8601(f.get('created_at')),
|
||||
'uploader_id': creator.get('id'),
|
||||
}
|
@ -0,0 +1,266 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
base_url,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class GediBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _clean_audio_fmts(formats):
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if 'acodec' in f:
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = re.findall(
|
||||
r'PlayerFactory\.setParam\(\'(?P<type>.+?)\',\s*\'(?P<name>.+?)\',\s*\'(?P<val>.+?)\'\);',
|
||||
webpage)
|
||||
|
||||
formats = []
|
||||
audio_fmts = []
|
||||
hls_fmts = []
|
||||
http_fmts = []
|
||||
title = ''
|
||||
thumb = ''
|
||||
|
||||
fmt_reg = r'(?P<t>video|audio)-(?P<p>rrtv|hls)-(?P<h>[\w\d]+)(?:-(?P<br>[\w\d]+))?$'
|
||||
br_reg = r'video-rrtv-(?P<br>\d+)-'
|
||||
|
||||
for t, n, v in player_data:
|
||||
if t == 'format':
|
||||
m = re.match(fmt_reg, n)
|
||||
if m:
|
||||
# audio formats
|
||||
if m.group('t') == 'audio':
|
||||
if m.group('p') == 'hls':
|
||||
audio_fmts.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'm4a', m3u8_id='hls', fatal=False))
|
||||
elif m.group('p') == 'rrtv':
|
||||
audio_fmts.append({
|
||||
'format_id': 'mp3',
|
||||
'url': v,
|
||||
'tbr': 128,
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
})
|
||||
|
||||
# video formats
|
||||
elif m.group('t') == 'video':
|
||||
# hls manifest video
|
||||
if m.group('p') == 'hls':
|
||||
hls_fmts.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
# direct mp4 video
|
||||
elif m.group('p') == 'rrtv':
|
||||
if not m.group('br'):
|
||||
mm = re.search(br_reg, v)
|
||||
http_fmts.append({
|
||||
'format_id': 'https-' + m.group('h'),
|
||||
'protocol': 'https',
|
||||
'url': v,
|
||||
'tbr': int(m.group('br')) if m.group('br') else
|
||||
(int(mm.group('br')) if mm.group('br') else 0),
|
||||
'height': int(m.group('h'))
|
||||
})
|
||||
|
||||
elif t == 'param':
|
||||
if n == 'videotitle':
|
||||
title = v
|
||||
if n == 'image_full_play':
|
||||
thumb = v
|
||||
|
||||
title = self._og_search_title(webpage) if title == '' else title
|
||||
|
||||
# clean weird char
|
||||
title = compat_str(title).encode('utf8', 'replace').replace(b'\xc3\x82', b'').decode('utf8', 'replace')
|
||||
|
||||
if audio_fmts:
|
||||
self._clean_audio_fmts(audio_fmts)
|
||||
self._sort_formats(audio_fmts)
|
||||
if hls_fmts:
|
||||
self._sort_formats(hls_fmts)
|
||||
if http_fmts:
|
||||
self._sort_formats(http_fmts)
|
||||
|
||||
formats.extend(audio_fmts)
|
||||
formats.extend(hls_fmts)
|
||||
formats.extend(http_fmts)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('twitter:description', webpage),
|
||||
'thumbnail': thumb,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class GediIE(GediBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/(?!embed/).+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||
'info_dict': {
|
||||
'id': '121559/121683',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||
'md5': 'e763b94b7920799a0e0e23ffefa2d157',
|
||||
'info_dict': {
|
||||
'id': '367415/367963',
|
||||
'ext': 'mp4',
|
||||
'title': 'Record della pista a Spa Francorchamps, la Pagani Huayra Roadster BC stupisce',
|
||||
'description': 'md5:5deb503cefe734a3eb3f07ed74303920',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||
'md5': 'e48108e97b1af137d22a8469f2019057',
|
||||
'info_dict': {
|
||||
'id': '66184/66267',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cassani e i brividi azzurri ai Mondiali di Imola: \\"Qui mi sono innamorato del ciclismo da ragazzino, incredibile tornarci da ct\\"',
|
||||
'description': 'md5:fc9c50894f70a2469bb9b54d3d0a3d3b',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||
'md5': 'a6e39f3bdc1842bbd92abbbbef230817',
|
||||
'info_dict': {
|
||||
'id': '141059/142723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dentro la notizia - Ferrari, cosa succede a Maranello',
|
||||
'description': 'md5:9907d65b53765681fa3a0b3122617c1f',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class GediEmbedsIE(GediBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/embed/.+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
||||
'md5': 'f4ac23cadfea7fef89bea536583fa7ed',
|
||||
'info_dict': {
|
||||
'id': '29312/29276',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cotticelli: \\"Non so cosa mi sia successo. Sto cercando di capire se ho avuto un malore\\"',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||
'md5': '0391c2c83c6506581003aaf0255889c0',
|
||||
'info_dict': {
|
||||
'id': '14772/14870',
|
||||
'ext': 'mp4',
|
||||
'title': 'Festival EMERGENCY, Villa: «La buona informazione aiuta la salute» (14772-14870)',
|
||||
'description': 'md5:2bce954d278248f3c950be355b7c2226',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_urls(urls):
|
||||
# add protocol if missing
|
||||
for i, e in enumerate(urls):
|
||||
if e.startswith('//'):
|
||||
urls[i] = 'https:%s' % e
|
||||
# clean iframes urls
|
||||
for i, e in enumerate(urls):
|
||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
data-frame-src=|
|
||||
<iframe[^\n]+src=
|
||||
)
|
||||
(["'])
|
||||
(?P<url>https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/embed/.+?)
|
||||
\1''', webpage)]
|
||||
return GediEmbedsIE._sanitize_urls(entries)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = GediEmbedsIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
'title': 'First day in LBRY? Start HERE!',
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||
'info_dict': {
|
||||
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'ext': 'mp3',
|
||||
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
@ -0,0 +1,131 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'info_dict': {
|
||||
'id': '34934644',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': 10619174,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/36787208',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '36787208',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': 5156321,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'hydration data', default='{}'), video_id)
|
||||
|
||||
clip = try_get(
|
||||
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
width = int(round(aspect_ratio * height))
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': width,
|
||||
'height': height
|
||||
})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, compat_str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
raise ExtractorError(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'An unknown error occurred ({0}).'.format(error),
|
||||
video_id=video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
}
|
@ -0,0 +1,167 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_count,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
remove_end,
|
||||
determine_ext,
|
||||
)
|
||||
import re
|
||||
|
||||
|
||||
class NitterIE(InfoExtractor):
|
||||
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
||||
INSTANCES = ('nitter.net',
|
||||
'nitter.snopyta.org',
|
||||
'nitter.42l.fr',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.13ad.de',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.dark.fail',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.cattube.org',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
|
||||
|
||||
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
||||
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
||||
current_instance = INSTANCES[0] # the test and official instance
|
||||
_TESTS = [
|
||||
{
|
||||
# GIF (wrapped in mp4)
|
||||
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
|
||||
'info_dict': {
|
||||
'id': '1314279897502629888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Firefox 🔥',
|
||||
'uploader_id': 'firefox',
|
||||
'uploader_url': 'https://' + current_instance + '/firefox',
|
||||
'upload_date': '20201008',
|
||||
'timestamp': 1602183720,
|
||||
},
|
||||
}, { # normal video
|
||||
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
|
||||
'info_dict': {
|
||||
'id': '1299715685392756737',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Le Doc',
|
||||
'uploader_id': 'Le___Doc',
|
||||
'uploader_url': 'https://' + current_instance + '/Le___Doc',
|
||||
'upload_date': '20200829',
|
||||
'timestamp': 1598711341,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, { # video embed in a "Streaming Political Ads" box
|
||||
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
|
||||
'info_dict': {
|
||||
'id': '1321147074491092994',
|
||||
'ext': 'mp4',
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mozilla',
|
||||
'uploader_id': 'mozilla',
|
||||
'uploader_url': 'https://' + current_instance + '/mozilla',
|
||||
'upload_date': '20201027',
|
||||
'timestamp': 1603820982
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
base_url = parsed_url.scheme + '://' + parsed_url.netloc
|
||||
|
||||
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'unknown_video':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': ext
|
||||
}]
|
||||
|
||||
title = (
|
||||
self._og_search_description(webpage).replace('\n', ' ')
|
||||
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
|
||||
description = title
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = (
|
||||
mobj.group('uploader_id')
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
|
||||
|
||||
if uploader_id:
|
||||
uploader_url = base_url + '/' + uploader_id
|
||||
|
||||
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
|
||||
if uploader:
|
||||
title = uploader + ' - ' + title
|
||||
|
||||
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
||||
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
||||
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
|
||||
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
|
||||
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
||||
for id in thumbnail_ids:
|
||||
thumbnails.append({
|
||||
'id': id,
|
||||
'url': thumbnail + '%3A' + id,
|
||||
})
|
||||
|
||||
date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
|
||||
upload_date = unified_strdate(date)
|
||||
timestamp = unified_timestamp(date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
@ -0,0 +1,201 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PinterestBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||
|
||||
def _call_api(self, resource, video_id, options):
|
||||
return self._download_json(
|
||||
'https://www.pinterest.com/resource/%sResource/get/' % resource,
|
||||
video_id, 'Download %s JSON metadata' % resource, query={
|
||||
'data': json.dumps({'options': options})
|
||||
})['resource_response']
|
||||
|
||||
def _extract_video(self, data, extract_formats=True):
|
||||
video_id = data['id']
|
||||
|
||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||
|
||||
formats = []
|
||||
duration = None
|
||||
if extract_formats:
|
||||
for format_id, format_dict in data['videos']['video_list'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||
ext = determine_ext(format_url)
|
||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(format_dict.get('width')),
|
||||
'height': int_or_none(format_dict.get('height')),
|
||||
'duration': duration,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||
timestamp = unified_timestamp(data.get('created_at'))
|
||||
|
||||
def _u(field):
|
||||
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||
|
||||
uploader = _u('full_name')
|
||||
uploader_id = _u('id')
|
||||
|
||||
repost_count = int_or_none(data.get('repin_count'))
|
||||
comment_count = int_or_none(data.get('comment_count'))
|
||||
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||
tags = data.get('hashtags')
|
||||
|
||||
thumbnails = []
|
||||
images = data.get('images')
|
||||
if isinstance(images, dict):
|
||||
for thumbnail_id, thumbnail in images.items():
|
||||
if not isinstance(thumbnail, dict):
|
||||
continue
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'extractor_key': PinterestIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class PinterestIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||
'info_dict': {
|
||||
'id': '664281013778109217',
|
||||
'ext': 'mp4',
|
||||
'title': 'Origami',
|
||||
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||
'duration': 57.7,
|
||||
'timestamp': 1593073622,
|
||||
'upload_date': '20200625',
|
||||
'uploader': 'Love origami -I am Dafei',
|
||||
'uploader_id': '586523688879454212',
|
||||
'repost_count': 50,
|
||||
'comment_count': 0,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'Pin', video_id, {
|
||||
'field_set_key': 'unauth_react_main_pin',
|
||||
'id': video_id,
|
||||
})['data']
|
||||
return self._extract_video(data)
|
||||
|
||||
|
||||
class PinterestCollectionIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||
'info_dict': {
|
||||
'id': '585890301462791043',
|
||||
'title': 'cool diys',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://www.pinterest.ca/fudohub/videos/',
|
||||
'info_dict': {
|
||||
'id': '682858430939307450',
|
||||
'title': 'VIDEOS',
|
||||
},
|
||||
'playlist_mincount': 365,
|
||||
'skip': 'Test with extract_formats=False',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PinterestIE.suitable(url) else super(
|
||||
PinterestCollectionIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, slug = re.match(self._VALID_URL, url).groups()
|
||||
board = self._call_api(
|
||||
'Board', slug, {
|
||||
'slug': slug,
|
||||
'username': username
|
||||
})['data']
|
||||
board_id = board['id']
|
||||
options = {
|
||||
'board_id': board_id,
|
||||
'page_size': 250,
|
||||
}
|
||||
bookmark = None
|
||||
entries = []
|
||||
while True:
|
||||
if bookmark:
|
||||
options['bookmarks'] = [bookmark]
|
||||
board_feed = self._call_api('BoardFeed', board_id, options)
|
||||
for item in (board_feed.get('data') or []):
|
||||
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||
continue
|
||||
video_id = item.get('id')
|
||||
if video_id:
|
||||
# Some pins may not be available anonymously via pin URL
|
||||
# video = self._extract_video(item, extract_formats=False)
|
||||
# video.update({
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||
# })
|
||||
# entries.append(video)
|
||||
entries.append(self._extract_video(item))
|
||||
bookmark = board_feed.get('bookmark')
|
||||
if not bookmark:
|
||||
break
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=board_id, playlist_title=board.get('name'))
|
@ -0,0 +1,413 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
base_url,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RCSBaseIE(InfoExtractor):
|
||||
_ALL_REPLACE = {
|
||||
'media2vam.corriere.it.edgesuite.net':
|
||||
'media2vam-corriere-it.akamaized.net',
|
||||
'media.youreporter.it.edgesuite.net':
|
||||
'media-youreporter-it.akamaized.net',
|
||||
'corrierepmd.corriere.it.edgesuite.net':
|
||||
'corrierepmd-corriere-it.akamaized.net',
|
||||
'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/':
|
||||
'video.corriere.it/vr360/videos/',
|
||||
'.net//': '.net/',
|
||||
}
|
||||
_MP4_REPLACE = {
|
||||
'media2vam.corbologna.corriere.it.edgesuite.net':
|
||||
'media2vam-bologna-corriere-it.akamaized.net',
|
||||
'media2vam.corfiorentino.corriere.it.edgesuite.net':
|
||||
'media2vam-fiorentino-corriere-it.akamaized.net',
|
||||
'media2vam.cormezzogiorno.corriere.it.edgesuite.net':
|
||||
'media2vam-mezzogiorno-corriere-it.akamaized.net',
|
||||
'media2vam.corveneto.corriere.it.edgesuite.net':
|
||||
'media2vam-veneto-corriere-it.akamaized.net',
|
||||
'media2.oggi.it.edgesuite.net':
|
||||
'media2-oggi-it.akamaized.net',
|
||||
'media2.quimamme.it.edgesuite.net':
|
||||
'media2-quimamme-it.akamaized.net',
|
||||
'media2.amica.it.edgesuite.net':
|
||||
'media2-amica-it.akamaized.net',
|
||||
'media2.living.corriere.it.edgesuite.net':
|
||||
'media2-living-corriere-it.akamaized.net',
|
||||
'media2.style.corriere.it.edgesuite.net':
|
||||
'media2-style-corriere-it.akamaized.net',
|
||||
'media2.iodonna.it.edgesuite.net':
|
||||
'media2-iodonna-it.akamaized.net',
|
||||
'media2.leitv.it.edgesuite.net':
|
||||
'media2-leitv-it.akamaized.net',
|
||||
}
|
||||
_MIGRATION_MAP = {
|
||||
'videoamica-vh.akamaihd': 'amica',
|
||||
'media2-amica-it.akamaized': 'amica',
|
||||
'corrierevam-vh.akamaihd': 'corriere',
|
||||
'media2vam-corriere-it.akamaized': 'corriere',
|
||||
'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno',
|
||||
'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno',
|
||||
'corveneto-vh.akamaihd': 'corrieredelveneto',
|
||||
'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto',
|
||||
'corbologna-vh.akamaihd': 'corrieredibologna',
|
||||
'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna',
|
||||
'corfiorentino-vh.akamaihd': 'corrierefiorentino',
|
||||
'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino',
|
||||
'corinnovazione-vh.akamaihd': 'corriereinnovazione',
|
||||
'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet',
|
||||
'videogazzanet-vh.akamaihd': 'gazzanet',
|
||||
'videogazzaworld-vh.akamaihd': 'gazzaworld',
|
||||
'gazzettavam-vh.akamaihd': 'gazzetta',
|
||||
'media2vam-gazzetta-it.akamaized': 'gazzetta',
|
||||
'videoiodonna-vh.akamaihd': 'iodonna',
|
||||
'media2-leitv-it.akamaized': 'leitv',
|
||||
'videoleitv-vh.akamaihd': 'leitv',
|
||||
'videoliving-vh.akamaihd': 'living',
|
||||
'media2-living-corriere-it.akamaized': 'living',
|
||||
'media2-oggi-it.akamaized': 'oggi',
|
||||
'videooggi-vh.akamaihd': 'oggi',
|
||||
'media2-quimamme-it.akamaized': 'quimamme',
|
||||
'quimamme-vh.akamaihd': 'quimamme',
|
||||
'videorunning-vh.akamaihd': 'running',
|
||||
'media2-style-corriere-it.akamaized': 'style',
|
||||
'style-vh.akamaihd': 'style',
|
||||
'videostyle-vh.akamaihd': 'style',
|
||||
'media2-stylepiccoli-it.akamaized': 'stylepiccoli',
|
||||
'stylepiccoli-vh.akamaihd': 'stylepiccoli',
|
||||
'doveviaggi-vh.akamaihd': 'viaggi',
|
||||
'media2-doveviaggi-it.akamaized': 'viaggi',
|
||||
'media2-vivimilano-corriere-it.akamaized': 'vivimilano',
|
||||
'vivimilano-vh.akamaihd': 'vivimilano',
|
||||
'media2-youreporter-it.akamaized': 'youreporter'
|
||||
}
|
||||
_MIGRATION_MEDIA = {
|
||||
'advrcs-vh.akamaihd': '',
|
||||
'corriere-f.akamaihd': '',
|
||||
'corrierepmd-corriere-it.akamaized': '',
|
||||
'corrprotetto-vh.akamaihd': '',
|
||||
'gazzetta-f.akamaihd': '',
|
||||
'gazzettapmd-gazzetta-it.akamaized': '',
|
||||
'gazzprotetto-vh.akamaihd': '',
|
||||
'periodici-f.akamaihd': '',
|
||||
'periodicisecure-vh.akamaihd': '',
|
||||
'videocoracademy-vh.akamaihd': ''
|
||||
}
|
||||
|
||||
def _get_video_src(self, video):
|
||||
mediaFiles = video.get('mediaProfile').get('mediaFile')
|
||||
src = {}
|
||||
# audio
|
||||
if video.get('mediaType') == 'AUDIO':
|
||||
for aud in mediaFiles:
|
||||
# todo: check
|
||||
src['mp3'] = aud.get('value')
|
||||
# video
|
||||
else:
|
||||
for vid in mediaFiles:
|
||||
if vid.get('mimeType') == 'application/vnd.apple.mpegurl':
|
||||
src['m3u8'] = vid.get('value')
|
||||
if vid.get('mimeType') == 'video/mp4':
|
||||
src['mp4'] = vid.get('value')
|
||||
|
||||
# replace host
|
||||
for t in src:
|
||||
for s, r in self._ALL_REPLACE.items():
|
||||
src[t] = src[t].replace(s, r)
|
||||
for s, r in self._MP4_REPLACE.items():
|
||||
src[t] = src[t].replace(s, r)
|
||||
|
||||
# switch cdn
|
||||
if 'mp4' in src and 'm3u8' in src:
|
||||
if ('-lh.akamaihd' not in src.get('m3u8')
|
||||
and 'akamai' in src.get('mp4')):
|
||||
if 'm3u8' in src:
|
||||
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8'))
|
||||
src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % (
|
||||
self._MIGRATION_MAP[matches.group('host')],
|
||||
matches.group('path').replace(
|
||||
'///', '/').replace(
|
||||
'//', '/').replace(
|
||||
'.csmil', '.urlset'
|
||||
)
|
||||
)
|
||||
if 'mp4' in src:
|
||||
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4'))
|
||||
if matches:
|
||||
if matches.group('host') in self._MIGRATION_MEDIA:
|
||||
vh_stream = 'https://media2.corriereobjects.it'
|
||||
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||
vh_stream = 'https://media2-it.corriereobjects.it'
|
||||
src['mp4'] = '%s%s' % (
|
||||
vh_stream,
|
||||
matches.group('path').replace(
|
||||
'///', '/').replace(
|
||||
'//', '/').replace(
|
||||
'/fcs.quotidiani/mediacenter', '').replace(
|
||||
'/fcs.quotidiani_!/mediacenter', '').replace(
|
||||
'corriere/content/mediacenter/', '').replace(
|
||||
'gazzetta/content/mediacenter/', '')
|
||||
)
|
||||
else:
|
||||
src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % (
|
||||
self._MIGRATION_MAP[matches.group('host')],
|
||||
matches.group('path').replace('///', '/').replace('//', '/')
|
||||
)
|
||||
|
||||
if 'mp3' in src:
|
||||
src['mp3'] = src.get('mp3').replace(
|
||||
'media2vam-corriere-it.akamaized.net',
|
||||
'vod.rcsobjects.it/corriere')
|
||||
if 'mp4' in src:
|
||||
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'm3u8' in src:
|
||||
if src.get('m3u8').find('fcs.quotidiani_!'):
|
||||
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
|
||||
if 'geoblocking' in video.get('mediaProfile'):
|
||||
if 'm3u8' in src:
|
||||
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'mp4' in src:
|
||||
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'm3u8' in src:
|
||||
if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'):
|
||||
src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset')
|
||||
|
||||
return src
|
||||
|
||||
def _create_formats(self, urls, video_id):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
|
||||
if not formats:
|
||||
formats.append({
|
||||
'format_id': 'http-mp4',
|
||||
'url': urls.get('mp4')
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
if 'cdn' not in mobj.groupdict():
|
||||
raise ExtractorError('CDN not found in url: %s' % url)
|
||||
|
||||
# for leitv/youreporter/viaggi don't use the embed page
|
||||
if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it'])
|
||||
and (mobj.group('vid') == 'video')):
|
||||
url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id)
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = None
|
||||
# look for json video data url
|
||||
json = self._search_regex(
|
||||
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
||||
//video\.rcs\.it
|
||||
/fragment-includes/video-includes/.+?\.json)["'];''',
|
||||
page, video_id, default=None)
|
||||
if json:
|
||||
if json.startswith('//'):
|
||||
json = 'https:%s' % json
|
||||
video_data = self._download_json(json, video_id)
|
||||
|
||||
# if json url not found, look for json video data directly in the page
|
||||
else:
|
||||
json = self._search_regex(
|
||||
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||
page, video_id, default=None)
|
||||
if json:
|
||||
video_data = self._parse_json(
|
||||
json, video_id, transform_source=js_to_json)
|
||||
else:
|
||||
# if no video data found try search for iframes
|
||||
emb = RCSEmbedsIE._extract_url(page)
|
||||
if emb:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': emb,
|
||||
'ie_key': RCSEmbedsIE.ie_key()
|
||||
}
|
||||
|
||||
if not video_data:
|
||||
raise ExtractorError('Video data not found in the page')
|
||||
|
||||
formats = self._create_formats(
|
||||
self._get_video_src(video_data), video_id)
|
||||
|
||||
description = (video_data.get('description')
|
||||
or clean_html(video_data.get('htmlDescription')))
|
||||
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data.get('title'),
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class RCSEmbedsIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<vid>video)\.
|
||||
(?P<cdn>
|
||||
(?:
|
||||
rcs|
|
||||
(?:corriere\w+\.)?corriere|
|
||||
(?:gazzanet\.)?gazzetta
|
||||
)\.it)
|
||||
/video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
|
||||
'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
|
||||
'info_dict': {
|
||||
'id': 'iodonna-0001585037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"',
|
||||
'description': 'md5:65b09633df9ffee57f48b39e34c9e067',
|
||||
'uploader': 'rcs.it',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||
'info_dict': {
|
||||
'id': 'gazzanet-mo05-0000260789',
|
||||
'ext': 'mp4',
|
||||
'title': 'Valentino Rossi e papà Graziano si divertono col drifting',
|
||||
'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a',
|
||||
'uploader': 'rcd',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player',
|
||||
'match_only': True
|
||||
}, {
|
||||
'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||
'match_only': True
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_urls(urls):
|
||||
# add protocol if missing
|
||||
for i, e in enumerate(urls):
|
||||
if e.startswith('//'):
|
||||
urls[i] = 'https:%s' % e
|
||||
# clean iframes urls
|
||||
for i, e in enumerate(urls):
|
||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
data-frame-src=|
|
||||
<iframe[^\n]+src=
|
||||
)
|
||||
(["'])
|
||||
(?P<url>(?:https?:)?//video\.
|
||||
(?:
|
||||
rcs|
|
||||
(?:corriere\w+\.)?corriere|
|
||||
(?:gazzanet\.)?gazzetta
|
||||
)
|
||||
\.it/video-embed/.+?)
|
||||
\1''', webpage)]
|
||||
return RCSEmbedsIE._sanitize_urls(entries)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = RCSEmbedsIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
|
||||
class RCSIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\.
|
||||
(?P<cdn>
|
||||
(?:
|
||||
corrieredelmezzogiorno\.
|
||||
|corrieredelveneto\.
|
||||
|corrieredibologna\.
|
||||
|corrierefiorentino\.
|
||||
)?corriere\.it
|
||||
|(?:gazzanet\.)?gazzetta\.it)
|
||||
/(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||
'md5': '0f4ededc202b0f00b6e509d831e2dcda',
|
||||
'info_dict': {
|
||||
'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante',
|
||||
'description': 'md5:93b51c9161ac8a64fb2f997b054d0152',
|
||||
'uploader': 'Corriere Tv',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||
'info_dict': {
|
||||
'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2',
|
||||
'ext': 'mp4',
|
||||
'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen',
|
||||
'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8',
|
||||
'uploader': 'DOVE Viaggi',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar',
|
||||
'md5': 'eedc1b5defd18e67383afef51ff7bdf9',
|
||||
'info_dict': {
|
||||
'id': '49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra',
|
||||
'description': 'md5:8c6e905dc3b9413218beca11ebd69778',
|
||||
'uploader': 'AMorici',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
|
||||
'match_only': True
|
||||
}]
|
||||
|
||||
|
||||
class RCSVariousIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://www\.
|
||||
(?P<cdn>
|
||||
leitv\.it|
|
||||
youreporter\.it
|
||||
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
||||
'md5': '618aaabac32152199c1af86784d4d554',
|
||||
'info_dict': {
|
||||
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marmellata di ciliegie fatta in casa',
|
||||
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
||||
'uploader': 'leitv.it',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/',
|
||||
'md5': '8dccd436b47a830bab5b4a88232f391a',
|
||||
'info_dict': {
|
||||
'id': 'fiume-sesia-3-ottobre-2020',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fiume Sesia 3 ottobre 2020',
|
||||
'description': 'md5:0070eef1cc884d13c970a4125063de55',
|
||||
'uploader': 'youreporter.it',
|
||||
}
|
||||
}]
|
@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class RumbleEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rumble.com/embed/v5pv5f',
|
||||
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||
'info_dict': {
|
||||
'id': 'v5pv5f',
|
||||
'ext': 'mp4',
|
||||
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||
'timestamp': 1571611968,
|
||||
'upload_date': '20191020',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://rumble.com/embedJS/', video_id,
|
||||
query={'request': 'video', 'v': video_id})
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for height, ua in (video.get('ua') or {}).items():
|
||||
for i in range(2):
|
||||
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||
if f_url:
|
||||
ext = determine_ext(f_url)
|
||||
f = {
|
||||
'ext': ext,
|
||||
'format_id': '%s-%sp' % (ext, height),
|
||||
'height': int_or_none(height),
|
||||
'url': f_url,
|
||||
}
|
||||
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||
if bitrate:
|
||||
f['tbr'] = int_or_none(bitrate)
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
author = video.get('author') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video.get('i'),
|
||||
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||
'channel': author.get('name'),
|
||||
'channel_url': author.get('url'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
@ -0,0 +1,239 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class SkyItPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'player.sky.it'
|
||||
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_DOMAIN = 'sky'
|
||||
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||
# http://static.sky.it/static/skyplayer/conf.json
|
||||
_TOKEN_MAP = {
|
||||
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
|
||||
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
|
||||
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
|
||||
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
|
||||
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
|
||||
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
|
||||
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
|
||||
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
|
||||
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
|
||||
}
|
||||
|
||||
def _player_url_result(self, video_id):
|
||||
return self.url_result(
|
||||
self._PLAYER_TMPL % (video_id, self._DOMAIN),
|
||||
SkyItPlayerIE.ie_key(), video_id)
|
||||
|
||||
def _parse_video(self, video, video_id):
|
||||
title = video['title']
|
||||
is_live = video.get('type') == 'live'
|
||||
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||
self.raise_geo_restricted(countries=['IT'])
|
||||
|
||||
if is_live:
|
||||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = self._extract_akamai_formats(
|
||||
hls_url, video_id, {'http': 'videoplatform.sky.it'})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||
'description': video.get('short_desc') or None,
|
||||
'timestamp': unified_timestamp(video.get('create_date')),
|
||||
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||
url).query).get('domain', [None])[0]
|
||||
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
|
||||
video = self._download_json(
|
||||
'https://apid.sky.it/vdp/v1/getVideoData',
|
||||
video_id, query={
|
||||
'caller': 'sky',
|
||||
'id': video_id,
|
||||
'token': token
|
||||
}, headers=self.geo_verification_headers())
|
||||
return self._parse_video(video, video_id)
|
||||
|
||||
|
||||
class SkyItVideoIE(SkyItPlayerIE):
|
||||
IE_NAME = 'video.sky.it'
|
||||
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
|
||||
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||
'info_dict': {
|
||||
'id': '631227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||
'timestamp': 1606036192,
|
||||
'upload_date': '20201122',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._player_url_result(video_id)
|
||||
|
||||
|
||||
class SkyItVideoLiveIE(SkyItPlayerIE):
|
||||
IE_NAME = 'video.sky.it:live'
|
||||
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.sky.it/diretta/tg24',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
asset_id = compat_str(self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
|
||||
livestream = self._download_json(
|
||||
'https://apid.sky.it/vdp/v1/getLivestream',
|
||||
asset_id, query={'id': asset_id})
|
||||
return self._parse_video(livestream, asset_id)
|
||||
|
||||
|
||||
class SkyItIE(SkyItPlayerIE):
|
||||
IE_NAME = 'sky.it'
|
||||
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
|
||||
'info_dict': {
|
||||
'id': '631201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
|
||||
'upload_date': '20201121',
|
||||
'timestamp': 1605995753,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
|
||||
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||
'info_dict': {
|
||||
'id': '631227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||
'timestamp': 1606036192,
|
||||
'upload_date': '20201122',
|
||||
},
|
||||
}]
|
||||
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
self._VIDEO_ID_REGEX, webpage, 'video id')
|
||||
return self._player_url_result(video_id)
|
||||
|
||||
|
||||
class SkyItAcademyIE(SkyItIE):
|
||||
IE_NAME = 'skyacademy.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
|
||||
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
|
||||
'info_dict': {
|
||||
'id': '523458',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sky Academy "The Best CineCamp 2019"',
|
||||
'timestamp': 1562843784,
|
||||
'upload_date': '20190711',
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'skyacademy'
|
||||
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
|
||||
|
||||
|
||||
class SkyItArteIE(SkyItIE):
|
||||
IE_NAME = 'arte.sky.it'
|
||||
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
|
||||
'md5': '515aee97b87d7a018b6c80727d3e7e17',
|
||||
'info_dict': {
|
||||
'id': '627926',
|
||||
'ext': 'mp4',
|
||||
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
|
||||
'upload_date': '20201106',
|
||||
'timestamp': 1604664493,
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'skyarte'
|
||||
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
|
||||
|
||||
|
||||
class CieloTVItIE(SkyItIE):
|
||||
IE_NAME = 'cielotv.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
|
||||
'md5': 'c4deed77552ba901c2a0d9258320304b',
|
||||
'info_dict': {
|
||||
'id': '499240',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il lunedì è sempre un dramma',
|
||||
'upload_date': '20190329',
|
||||
'timestamp': 1553862178,
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'cielo'
|
||||
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
|
||||
|
||||
|
||||
class TV8ItIE(SkyItVideoIE):
|
||||
IE_NAME = 'tv8.it'
|
||||
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
|
||||
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||
'info_dict': {
|
||||
'id': '630529',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
|
||||
'timestamp': 1605721374,
|
||||
'upload_date': '20201118',
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'mtv8'
|
@ -0,0 +1,176 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
def _extract_episode(data, episode_id=None):
|
||||
title = data['title']
|
||||
download_url = data['download_url']
|
||||
|
||||
series = try_get(data, lambda x: x['show']['title'], compat_str)
|
||||
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
|
||||
|
||||
thumbnails = []
|
||||
for image in ('image_original', 'image_medium', 'image'):
|
||||
image_url = url_or_none(data.get('%s_url' % image))
|
||||
if image_url:
|
||||
thumbnails.append({'url': image_url})
|
||||
|
||||
def stats(key):
|
||||
return int_or_none(try_get(
|
||||
data,
|
||||
(lambda x: x['%ss_count' % key],
|
||||
lambda x: x['stats']['%ss' % key])))
|
||||
|
||||
def duration(key):
|
||||
return float_or_none(data.get(key), scale=1000)
|
||||
|
||||
return {
|
||||
'id': compat_str(episode_id or data['episode_id']),
|
||||
'url': download_url,
|
||||
'display_id': data.get('permalink'),
|
||||
'title': title,
|
||||
'description': data.get('description'),
|
||||
'timestamp': unified_timestamp(data.get('published_at')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'creator': uploader,
|
||||
'duration': duration('duration') or duration('length'),
|
||||
'view_count': stats('play'),
|
||||
'like_count': stats('like'),
|
||||
'comment_count': stats('message'),
|
||||
'format': 'MPEG Layer 3',
|
||||
'format_id': 'mp3',
|
||||
'container': 'mp3',
|
||||
'ext': 'mp3',
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'extractor_key': SpreakerIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
api\.spreaker\.com/
|
||||
(?:
|
||||
(?:download/)?episode|
|
||||
v2/episodes
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/episode/12534508',
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
|
||||
'ext': 'mp3',
|
||||
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
|
||||
'description': 'md5:0588c43e27be46423e183076fa071177',
|
||||
'timestamp': 1502250336,
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': '9780658',
|
||||
'duration': 1063.42,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'series': 'Success With Music (SWM)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
|
||||
episode_id)['response']['episode']
|
||||
return _extract_episode(data, episode_id)
|
||||
|
||||
|
||||
class SpreakerPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_id = self._search_regex(
|
||||
(r'data-episode_id=["\'](?P<id>\d+)',
|
||||
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/episode/%s' % episode_id,
|
||||
ie=SpreakerIE.ie_key(), video_id=episode_id)
|
||||
|
||||
|
||||
class SpreakerShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/show/4652058',
|
||||
'info_dict': {
|
||||
'id': '4652058',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}]
|
||||
|
||||
def _entries(self, show_id):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'https://api.spreaker.com/show/%s/episodes' % show_id,
|
||||
show_id, note='Downloading JSON page %d' % page_num, query={
|
||||
'page': page_num,
|
||||
'max_per_page': 100,
|
||||
})
|
||||
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
|
||||
if not pager:
|
||||
break
|
||||
results = pager.get('results')
|
||||
if not results or not isinstance(results, list):
|
||||
break
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
yield _extract_episode(result)
|
||||
if page_num == pager.get('last_page'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
|
||||
class SpreakerShowPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show_id = self._search_regex(
|
||||
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/show/%s' % show_id,
|
||||
ie=SpreakerShowIE.ie_key(), video_id=show_id)
|
@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ThisVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://thisvid.com/embed/2400174/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
main_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, main_id)
|
||||
|
||||
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
|
||||
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
|
||||
if not kvs_version.startswith("5."):
|
||||
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
|
||||
|
||||
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
|
||||
# video_id, video_url and license_code from the 'flashvars' JSON object:
|
||||
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
|
||||
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
|
||||
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail.startswith("//"):
|
||||
thumbnail = "https:" + thumbnail
|
||||
if (re.match(self._VALID_URL, url).group('type') == "videos"):
|
||||
display_id = main_id
|
||||
else:
|
||||
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'url': getrealurl(video_url, license_code),
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
def getrealurl(video_url, license_code):
|
||||
urlparts = video_url.split('/')[2:]
|
||||
license = getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
new = ""
|
||||
l = (o + sum([int(n) for n in license[o:]])) % 32
|
||||
|
||||
for i in range(0, len(newmagic)):
|
||||
if i == o:
|
||||
new += newmagic[l]
|
||||
elif i == l:
|
||||
new += newmagic[o]
|
||||
else:
|
||||
new += newmagic[i]
|
||||
newmagic = new
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return "/".join(urlparts)
|
||||
|
||||
|
||||
def getlicensetoken(license):
|
||||
modlicense = license.replace("$", "").replace("0", "1")
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = str(4 * abs(fronthalf - backhalf))
|
||||
retval = ""
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
return retval
|
@ -1,74 +1,24 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from .nbc import NBCIE
|
||||
|
||||
|
||||
class USANetworkIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
|
||||
'md5': '33c0d2ba381571b414024440d08d57fd',
|
||||
class USANetworkIE(NBCIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
|
||||
'info_dict': {
|
||||
'id': '3086229',
|
||||
'id': '4185302',
|
||||
'ext': 'mp4',
|
||||
'title': 'HPE Cybersecurity',
|
||||
'description': 'The more we digitize our world, the more vulnerable we are.',
|
||||
'upload_date': '20160818',
|
||||
'timestamp': 1471535460,
|
||||
'uploader': 'NBCU-USA',
|
||||
'title': 'Intelligence (Trailer)',
|
||||
'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
|
||||
'upload_date': '20200715',
|
||||
'timestamp': 1594785600,
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def _x(name, default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, name, default=default, group='value')
|
||||
|
||||
video_id = _x('mpx-guid')
|
||||
title = _x('episode-title')
|
||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if _x('is-full-episode', None) == '1':
|
||||
query['manifest'] = 'm3u'
|
||||
|
||||
if _x('is-entitlement', None) == '1':
|
||||
adobe_pass = {}
|
||||
drupal_settings = self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', fatal=False)
|
||||
if drupal_settings:
|
||||
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
|
||||
if drupal_settings:
|
||||
adobe_pass = drupal_settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
||||
title, video_id, _x('episode-rating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
||||
query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': _x('show-title', None),
|
||||
'episode': title,
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue