Add an extractor for faz.net (closes #1582)

rtmp_test
Jaime Marquínez Ferrándiz 11 years ago
parent 4193a453c2
commit 63da13e829

@ -34,6 +34,7 @@ from .eighttracks import EightTracksIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE
from .fktv import ( from .fktv import (
FKTVIE, FKTVIE,
FKTVPosteckeIE, FKTVPosteckeIE,

@ -0,0 +1,60 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
)
class FazIE(InfoExtractor):
IE_NAME = u'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
_TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
u'file': u'12610585.mp4',
u'info_dict': {
u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen(video_id)
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id,
u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS')
formats = []
for code in ['LOW', 'HIGH', 'HQ']:
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
'ext': determine_ext(encoding_url),
'format_id': code.lower(),
})
descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
info = {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': clean_html(descr_html),
'thumbnail': config.find('STILL/STILL_BIG').text,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
Loading…
Cancel
Save