[bloomberg] Extract the available formats (closes #2776)

It uses a helper method in the InfoExtractor class.
The downloader will pick the requested formats using the bitrate in the info dict.
totalwebcasting
Jaime Marquínez Ferrándiz 10 years ago
parent 4958ae2058
commit 31bb8d3f51

@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
if requested_bitrate is None:
# get the best format
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
else:
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)

@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
'md5': '7bf08858ff7c203c870e8a6190e221e5',
# The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
return {
'id': name.split('-')[-1],
'title': title,
'url': f4m_url,
'ext': 'flv',
'formats': self._extract_f4m_formats(f4m_url, name),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

@ -18,6 +18,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
int_or_none,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
@ -590,6 +591,22 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
def _extract_f4m_formats(self, manifest_url, video_id):
manifest = self._download_xml(manifest_url, video_id)
formats = []
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
formats.append({
'url': manifest_url,
'ext': 'flv',
'tbr': int_or_none(media_el.attrib.get('bitrate')),
'width': int_or_none(media_el.attrib.get('width')),
'height': int_or_none(media_el.attrib.get('height')),
})
self._sort_formats(formats)
return formats
class SearchInfoExtractor(InfoExtractor):
"""

Loading…
Cancel
Save