From 86b868c6a52a9fba2c11ceb4268c7d088d2960f6 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 13 Oct 2020 01:58:59 +0200 Subject: [PATCH 1/4] [Twitter/t.co] simple extractor added. modification needed. --- youtube_dlc/extractor/extractors.py | 1 + youtube_dlc/extractor/twitter.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index f0860e04d..d31edd7c8 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1268,6 +1268,7 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, + TwitterShortenerIE, ) from .udemy import ( UdemyIE, diff --git a/youtube_dlc/extractor/twitter.py b/youtube_dlc/extractor/twitter.py index 4284487db..8c35e285c 100644 --- a/youtube_dlc/extractor/twitter.py +++ b/youtube_dlc/extractor/twitter.py @@ -608,3 +608,11 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): info['formats'] = self._extract_pscp_m3u8_formats( m3u8_url, broadcast_id, m3u8_id, state, width, height) return info + + +class TwitterShortenerIE(TwitterBaseIE): + IE_NAME = 'twitter:shortener' + _VALID_URL = r'https?://t.co/' + + def _real_extract(self, url): + print(url) \ No newline at end of file From f33b7b5eb454ab182a3c8740875274469f5c3e0b Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 13 Oct 2020 02:03:48 +0200 Subject: [PATCH 2/4] [Twitter/t.co] showcase expanded how to use generic --- youtube_dlc/extractor/twitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/twitter.py b/youtube_dlc/extractor/twitter.py index 8c35e285c..78d8ebdd5 100644 --- a/youtube_dlc/extractor/twitter.py +++ b/youtube_dlc/extractor/twitter.py @@ -615,4 +615,5 @@ class TwitterShortenerIE(TwitterBaseIE): _VALID_URL = r'https?://t.co/' def _real_extract(self, url): - print(url) \ No newline at end of file + print(url) + return self.url_result(url, 'Generic') \ No newline at end of file From 9e20a9c4473d083189456e438424ec979de4ca6f Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 17 Oct 2020 10:24:57 +0200 Subject: [PATCH 3/4] [twitter/t.co] implemented. --- youtube_dlc/extractor/twitter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/twitter.py b/youtube_dlc/extractor/twitter.py index 78d8ebdd5..95cfa8154 100644 --- a/youtube_dlc/extractor/twitter.py +++ b/youtube_dlc/extractor/twitter.py @@ -615,5 +615,5 @@ class TwitterShortenerIE(TwitterBaseIE): _VALID_URL = r'https?://t.co/' def _real_extract(self, url): - print(url) - return self.url_result(url, 'Generic') \ No newline at end of file + new_url = self._request_webpage(url, None, headers={'User-Agent': 'curl'}).geturl() + return self.url_result(new_url) From a537ab1a094cf782915b654250a6110061b4377a Mon Sep 17 00:00:00 2001 From: Unknown Date: Sun, 18 Oct 2020 02:14:13 +0200 Subject: [PATCH 4/4] [twitter/t.co] update supportedsites, failover replace, tco:id feature --- docs/supportedsites.md | 2 ++ youtube_dlc/extractor/twitter.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 10c12b87a..c46d122ff 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -540,6 +540,7 @@ - **natgeo:video** - **NationalGeographicTV** - **Naver** + - **Naver:live** - **NBA** - **NBC** - **NBCNews** @@ -976,6 +977,7 @@ - **twitter:amplify** - **twitter:broadcast** - **twitter:card** + - **twitter:shortener** - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 diff --git a/youtube_dlc/extractor/twitter.py b/youtube_dlc/extractor/twitter.py index 95cfa8154..ca5e040c6 100644 --- a/youtube_dlc/extractor/twitter.py +++ b/youtube_dlc/extractor/twitter.py @@ -612,8 +612,17 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): class TwitterShortenerIE(TwitterBaseIE): IE_NAME = 'twitter:shortener' - _VALID_URL = r'https?://t.co/' + _VALID_URL = r'https?://t.co/(?P[^?]+)|tco:(?P[^?]+)' + _BASE_URL = 'https://t.co/' def _real_extract(self, url): - new_url = self._request_webpage(url, None, headers={'User-Agent': 'curl'}).geturl() + mobj = re.match(self._VALID_URL, url) + eid, id = mobj.group('eid', 'id') + if eid: + id = eid + url = self._BASE_URL + id + new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl() + __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link=" + if new_url.startswith(__UNSAFE_LINK): + new_url = new_url.replace(__UNSAFE_LINK, "") return self.url_result(new_url)