[khanacademy] Add support (Fixes #2066)

11 years ago · 3d3538e422
parent 0cdad20c75
commit 3d3538e422
4 changed files with 93 additions and 1 deletions
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
 # Allow direct execution
 import os
 import sys
@ -30,6 +29,7 @@ from youtube_dl.extractor import (
    SmotriUserIE,
    IviCompilationIE,
    ImdbListIE,
    KhanAcademyIE,
 )
@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'Animated and Family Films')
        self.assertTrue(len(result['entries']) >= 48)
    def test_khanacademy_topic(self):
        dl = FakeYDL()
        ie = KhanAcademyIE(dl)
        result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], u'cryptography')
        self.assertEqual(result['title'], u'Journey into cryptography')
        self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?')
        self.assertTrue(len(result['entries']) >= 3)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -98,6 +98,7 @@ from .justintv import JustinTVIE
 from .jpopsukitv import JpopsukiIE
 from .kankan import KankanIE
 from .keezmovies import KeezMoviesIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1,4 +1,5 @@
 import base64
 import json
 import os
 import re
 import socket
@ -260,6 +261,15 @@ class InfoExtractor(object):
            xml_string = transform_source(xml_string)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
    def _download_json(self, url_or_request, video_id,
                       note=u'Downloading JSON metadata',
                       errnote=u'Unable to download JSON metadata'):
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
        try:
            return json.loads(json_string)
        except ValueError as ve:
            raise ExtractorError('Failed to download JSON', cause=ve)
    def report_warning(self, msg, video_id=None):
        idstr = u'' if video_id is None else u'%s: ' % video_id
        self._downloader.report_warning(
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@ -0,0 +1,71 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
 )
 class KhanAcademyIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
    IE_NAME = 'KhanAcademy'
    _TEST = {
        'url': 'http://www.khanacademy.org/video/one-time-pad',
        'file': 'one-time-pad.mp4',
        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
        'info_dict': {
            'title': 'The one-time pad',
            'description': 'The perfect cipher',
            'duration': 176,
            'uploader': 'Brit Cruise',
            'upload_date': '20120411',
        }
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
        if m.group('key') == 'video':
            data = self._download_json(
                'http://api.khanacademy.org/api/v1/videos/' + video_id,
                video_id, 'Downloading video info')
            upload_date = unified_strdate(data['date_added'])
            uploader = ', '.join(data['author_names'])
            return {
                '_type': 'url_transparent',
                'url': data['url'],
                'id': video_id,
                'title': data['title'],
                'thumbnail': data['image_url'],
                'duration': data['duration'],
                'description': data['description'],
                'uploader': uploader,
                'upload_date': upload_date,
            }
        else:
            # topic
            data = self._download_json(
                'http://api.khanacademy.org/api/v1/topic/' + video_id,
                video_id, 'Downloading topic info')
            entries = [
                {
                    '_type': 'url',
                    'url': c['url'],
                    'id': c['id'],
                    'title': c['title'],
                }
                for c in data['children'] if c['kind'] in ('Video', 'Topic')]
            return {
                '_type': 'playlist',
                'id': video_id,
                'title': data['title'],
                'description': data['description'],
                'entries': entries,
            }