[tudou] Merge into youku extractor (fixes #12214)

Also, there are no tudou playlists anymore. All playlist URLs points to youku
playlists.
master
Yen Chi Hsuan 7 years ago
parent d3d4ba7f24
commit c130f0a37b
No known key found for this signature in database
GPG Key ID: 7F902A182457CA23

@ -5,6 +5,7 @@ Core
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
Extractors Extractors
* [tudou] Merge into youku extractor (#12214)
* [youku:show] Fix extraction * [youku:show] Fix extraction
* [youku] Fix extraction (#13191) * [youku] Fix extraction (#13191)

@ -1019,11 +1019,6 @@ from .trilulilu import TriluliluIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubitv import TubiTvIE from .tubitv import TubiTvIE
from .tudou import (
TudouIE,
TudouPlaylistIE,
TudouAlbumIE,
)
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import ( from .tunein import (
TuneInClipIE, TuneInClipIE,

@ -3,138 +3,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
InAdvancePagedList,
float_or_none,
unescapeHTML,
)
class TudouIE(InfoExtractor):
IE_NAME = 'tudou'
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
_TESTS = [{
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
'md5': '140a49ed444bd22f93330985d8475fcb',
'info_dict': {
'id': '159448201',
'ext': 'f4v',
'title': '卡马乔国足开大脚长传冲吊集锦',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1372113489000,
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
'duration': 289.04,
'view_count': int,
'filesize': int,
}
}, {
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
'info_dict': {
'id': '117049447',
'ext': 'f4v',
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1349207518000,
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
'duration': 5478.33,
'view_count': int,
'filesize': int,
}
}]
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
# 0001, 0002 and 4001 are not included as they indicate temporary issues
TVC_ERRORS = {
'0003': 'The video is deleted or does not exist',
'1001': 'This video is unavailable due to licensing issues',
'1002': 'This video is unavailable as it\'s under review',
'1003': 'This video is unavailable as it\'s under review',
'3001': 'Password required',
'5001': 'This video is available in Mainland China only due to licensing issues',
'7001': 'This video is unavailable',
'8001': 'This video is unavailable due to licensing issues',
}
def _url_for_id(self, video_id, quality=None):
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
if quality:
info_url += '&hd' + quality
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
error = xml_data.attrib.get('error')
if error is not None:
raise ExtractorError('Tudou said: %s' % error, expected=True)
final_url = xml_data.text
return final_url
def _real_extract(self, url):
video_id = self._match_id(url)
item_data = self._download_json(
'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
youku_vcode = item_data.get('vcode')
if youku_vcode:
return self.url_result('youku:' + youku_vcode, ie='Youku')
if not item_data.get('itemSegs'):
tvc_code = item_data.get('tvcCode')
if tvc_code:
err_msg = self.TVC_ERRORS.get(tvc_code)
if err_msg:
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
raise ExtractorError('Unxpected error returned from Tudou')
title = unescapeHTML(item_data['kw'])
description = item_data.get('desc')
thumbnail_url = item_data.get('pic')
view_count = int_or_none(item_data.get('playTimes'))
timestamp = int_or_none(item_data.get('pt'))
segments = self._parse_json(item_data['itemSegs'], video_id)
# It looks like the keys are the arguments that have to be passed as
# the hd field in the request url, we pick the higher
# Also, filter non-number qualities (see issue #3643).
quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
key=lambda k: int(k))[-1]
parts = segments[quality]
len_parts = len(parts)
if len_parts > 1:
self.to_screen('%s: found %s parts' % (video_id, len_parts))
def part_func(partnum):
part = parts[partnum]
part_id = part['k']
final_url = self._url_for_id(part_id, quality)
ext = (final_url.split('?')[0]).split('.')[-1]
return [{
'id': '%s' % part_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
'view_count': view_count,
'timestamp': timestamp,
'duration': float_or_none(part.get('seconds'), 1000),
'filesize': int_or_none(part.get('size')),
'http_headers': {
'Referer': self._PLAYER_URL,
},
}]
entries = InAdvancePagedList(part_func, len_parts, 1)
return {
'_type': 'multi_video',
'entries': entries,
'id': video_id,
'title': title,
}
class TudouPlaylistIE(InfoExtractor): class TudouPlaylistIE(InfoExtractor):

@ -22,7 +22,9 @@ class YoukuIE(InfoExtractor):
IE_DESC = '优酷' IE_DESC = '优酷'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| https?://(
(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
video\.tudou\.com/v/)|
youku:) youku:)
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
''' '''
@ -71,6 +73,16 @@ class YoukuIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
}, },
}, {
'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
'info_dict': {
'id': 'XMjIyNzAzMTQ4NA',
'ext': 'mp4',
'title': '卡马乔国足开大脚长传冲吊集锦',
},
}, {
'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -107,7 +119,7 @@ class YoukuIE(InfoExtractor):
# request basic data # request basic data
basic_data_params = { basic_data_params = {
'vid': video_id, 'vid': video_id,
'ccode': '0401', 'ccode': '0402' if 'tudou.com' in url else '0401',
'client_ip': '192.168.1.1', 'client_ip': '192.168.1.1',
'utid': cna, 'utid': cna,
'client_ts': time.time() / 1000, 'client_ts': time.time() / 1000,

Loading…
Cancel
Save