From 7a6d33a9a5390d0dab7e9162d6b2552cb0fe23a5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 5 May 2016 21:42:37 +0100 Subject: [PATCH] [pbs] extract chapters information --- youtube_dl/extractor/pbs.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6166dc2ad..0727e381b 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, determine_ext, int_or_none, + float_or_none, js_to_json, strip_jsonp, strip_or_none, @@ -464,6 +465,7 @@ class PBSIE(InfoExtractor): redirects.append(redirect) redirect_urls.add(redirect_url) + chapters = [] # Player pages may also serve different qualities for page in ('widget/partnerplayer', 'portalplayer'): player = self._download_webpage( @@ -479,6 +481,20 @@ class PBSIE(InfoExtractor): extract_redirect_urls(video_info) if not info: info = video_info + if not chapters: + for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player): + chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False) + if not chapter: + continue + start_time = float_or_none(chapter.get('start_time'), 1000) + duration = float_or_none(chapter.get('duration'), 1000) + if start_time is None or duration is None: + continue + chapters.append({ + 'start_time': start_time, + 'end_time': start_time + duration, + 'title': chapter.get('title'), + }) formats = [] http_url = None @@ -588,4 +604,5 @@ class PBSIE(InfoExtractor): 'upload_date': upload_date, 'formats': formats, 'subtitles': subtitles, + 'chapters': chapters, }