From 1ba2d65ffd2c5b2a82547471bc72a6efdc848a7a Mon Sep 17 00:00:00 2001
From: Gilles Pietri <gilles@wolface.fr>
Date: Wed, 23 Sep 2020 23:09:00 +0200
Subject: [PATCH 1/8] [bandcamp] fix regexp for JSON matching on bandcamp

---
 youtube_dl/extractor/bandcamp.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index f14b407dc..ad1812320 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -91,10 +91,11 @@ class BandcampIE(InfoExtractor):
         duration = None
 
         formats = []
-        track_info = self._parse_json(
-            self._search_regex(
-                r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
-                webpage, 'track info', default='{}'), title)
+        trackinfo_block = self._search_regex(
+            r'trackinfo&quot;:\[\s*({.+?})\s*\],&quot;',
+            webpage, 'track info', default='{}')
+        quoted_json = trackinfo_block.replace('&quot;', '"')
+        track_info = self._parse_json(quoted_json, title)
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -117,7 +118,7 @@ class BandcampIE(InfoExtractor):
 
         def extract(key):
             return self._search_regex(
-                r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+                r',&quot;%s&quot;:(&quot;)(?P<value>(?:(?!&quot;).)+)&quot;' % key,
                 webpage, key, default=None, group='value')
 
         artist = extract('artist')

From c0acb120831d18d177ec013a8334370c8c10cc0e Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Sat, 26 Sep 2020 17:34:35 +0200
Subject: [PATCH 2/8] [bandcamp] use unescapeHTML instead of a simple replace
 of quotes

---
 youtube_dl/extractor/bandcamp.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index ad1812320..55d110e28 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -92,10 +92,10 @@ class BandcampIE(InfoExtractor):
 
         formats = []
         trackinfo_block = self._search_regex(
-            r'trackinfo&quot;:\[\s*({.+?})\s*\],&quot;',
+            r'trackinfo(?:["\']|&quot;):\[\s*({.+?})\s*\],(?:["\']|&quot;)',
             webpage, 'track info', default='{}')
-        quoted_json = trackinfo_block.replace('&quot;', '"')
-        track_info = self._parse_json(quoted_json, title)
+        unescaped_json = unescapeHTML(trackinfo_block)
+        track_info = self._parse_json(unescaped_json, title)
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -118,7 +118,7 @@ class BandcampIE(InfoExtractor):
 
         def extract(key):
             return self._search_regex(
-                r',&quot;%s&quot;:(&quot;)(?P<value>(?:(?!&quot;).)+)&quot;' % key,
+                r',(["\']|&quot;)%s\1:\1(?P<value>(?:(?!\1).)+)\1' % key,
                 webpage, key, default=None, group='value')
 
         artist = extract('artist')

From d5a55b1725ff4182664245e9aa9c57817f7c012b Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Sun, 27 Sep 2020 14:51:42 +0200
Subject: [PATCH 3/8] [bandcamp] match album titles inside the new JSON data
 block, and unescape the title properly

---
 youtube_dl/extractor/bandcamp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 55d110e28..f036a89eb 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -316,10 +316,10 @@ class BandcampAlbumIE(InfoExtractor):
             if self._html_search_meta('duration', elem_content, default=None)]
 
         title = self._html_search_regex(
-            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+            r'album_title\s*(?:&quot;|["\']):\s*(?:&quot;|["\'])((?:\\.|[^"\\])+?)(?:&quot;|["\'])',
             webpage, 'title', fatal=False)
         if title:
-            title = title.replace(r'\"', '"')
+            title = unescapeHTML(title)
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,

From 4014677cb337dc1886a74dc20e7d24c18deaf911 Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Sun, 27 Sep 2020 15:11:08 +0200
Subject: [PATCH 4/8] [bandcamp] fix the freeDownloadPage JSON lookup, and use
 the id from the URL to match the tracks

---
 youtube_dl/extractor/bandcamp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index f036a89eb..eccb867a0 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -128,12 +128,12 @@ class BandcampIE(InfoExtractor):
         release_date = unified_strdate(extract('album_release_date'))
 
         download_link = self._search_regex(
-            r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
             'download link', default=None, group='url')
         if download_link:
             track_id = self._search_regex(
-                r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
-                webpage, 'track id')
+                r'\?id=(?P<id>\d+)&',
+                download_link, 'track id')
 
             download_webpage = self._download_webpage(
                 download_link, track_id, 'Downloading free downloads page')

From 20cab1e2ca4fcff1925c1223705f5e9d8249d49a Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Sun, 27 Sep 2020 15:52:55 +0200
Subject: [PATCH 5/8] [bandcamp] update youtuble dl test song information to
 match title as artist - track, and add missing keys from info_dict

---
 youtube_dl/extractor/bandcamp.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index eccb867a0..3d32b1e0f 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -33,8 +33,11 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '1812978515',
             'ext': 'mp3',
-            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            'title': "youtube-dl  \\ - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
             'duration': 9.8485,
+            'uploader': 'youtube-dl  \\',
+            'timestamp': 1354224127,
+            'upload_date': '20121129',
         },
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {

From a4e1c3e3712a55a78f48c71dd82ab73fa7fc7375 Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Mon, 28 Sep 2020 19:42:56 +0200
Subject: [PATCH 6/8] [bandcamp] fix test song uploader name, cleanup remanings
 " and \ in data, including album titles

---
 youtube_dl/extractor/bandcamp.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 3d32b1e0f..3405b570a 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -33,9 +33,9 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '1812978515',
             'ext': 'mp3',
-            'title': "youtube-dl  \\ - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
             'duration': 9.8485,
-            'uploader': 'youtube-dl  \\',
+            'uploader': "youtube-dl  \"'/\\\u00e4\u21ad",
             'timestamp': 1354224127,
             'upload_date': '20121129',
         },
@@ -43,7 +43,7 @@ class BandcampIE(InfoExtractor):
     }, {
         # free download
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '853e35bf34aa1d6fe2615ae612564b36',
+        'md5': '5d92af55811e47f38962a54c30b07ef0',
         'info_dict': {
             'id': '2650410135',
             'ext': 'aiff',
@@ -94,11 +94,12 @@ class BandcampIE(InfoExtractor):
         duration = None
 
         formats = []
-        trackinfo_block = self._search_regex(
+        trackinfo_block = self._html_search_regex(
             r'trackinfo(?:["\']|&quot;):\[\s*({.+?})\s*\],(?:["\']|&quot;)',
             webpage, 'track info', default='{}')
-        unescaped_json = unescapeHTML(trackinfo_block)
-        track_info = self._parse_json(unescaped_json, title)
+
+        track_info = self._parse_json(trackinfo_block, title)
+
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -120,9 +121,10 @@ class BandcampIE(InfoExtractor):
             duration = float_or_none(track_info.get('duration'))
 
         def extract(key):
-            return self._search_regex(
-                r',(["\']|&quot;)%s\1:\1(?P<value>(?:(?!\1).)+)\1' % key,
+            data = self._html_search_regex(
+                r',(["\']|&quot;)%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key,
                 webpage, key, default=None, group='value')
+            return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
 
         artist = extract('artist')
         album = extract('album_title')
@@ -319,10 +321,12 @@ class BandcampAlbumIE(InfoExtractor):
             if self._html_search_meta('duration', elem_content, default=None)]
 
         title = self._html_search_regex(
-            r'album_title\s*(?:&quot;|["\']):\s*(?:&quot;|["\'])((?:\\.|[^"\\])+?)(?:&quot;|["\'])',
-            webpage, 'title', fatal=False)
+            r'album_title\s*(?:&quot;|["\']):\s*(&quot;|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1',
+            webpage, 'title', fatal=False, group='album')
+
         if title:
-            title = unescapeHTML(title)
+            title = title.replace(r'\"', '"')
+
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,

From 7117b849c1202321bff4d4995f92a3626cfdd20c Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Tue, 29 Sep 2020 12:09:55 +0200
Subject: [PATCH 7/8] [bandcamp] Revert test song title, and extract title
 generally (which may fail, as the other title json values might come up),
 instead of out of trackinfo, as bandcamp prefixes it with artist -

---
 youtube_dl/extractor/bandcamp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 3405b570a..04b8aa80f 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -33,7 +33,7 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '1812978515',
             'ext': 'mp3',
-            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
             'duration': 9.8485,
             'uploader': "youtube-dl  \"'/\\\u00e4\u21ad",
             'timestamp': 1354224127,
@@ -99,7 +99,6 @@ class BandcampIE(InfoExtractor):
             webpage, 'track info', default='{}')
 
         track_info = self._parse_json(trackinfo_block, title)
-
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -115,7 +114,7 @@ class BandcampIE(InfoExtractor):
                         'acodec': ext,
                         'abr': int_or_none(abr_str),
                     })
-            track = track_info.get('title')
+
             track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
             track_number = int_or_none(track_info.get('track_num'))
             duration = float_or_none(track_info.get('duration'))
@@ -126,6 +125,7 @@ class BandcampIE(InfoExtractor):
                 webpage, key, default=None, group='value')
             return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
 
+        track = extract('title')
         artist = extract('artist')
         album = extract('album_title')
         timestamp = unified_timestamp(

From 955906045fdd24b21a9b6edd8c410d36394f7241 Mon Sep 17 00:00:00 2001
From: Gilou <contact+dev@gilouweb.com>
Date: Tue, 29 Sep 2020 14:15:53 +0200
Subject: [PATCH 8/8] [bandcamp] alternative approach using JSON data elements
 parsing to fetch the data

---
 youtube_dl/extractor/bandcamp.py | 45 +++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 04b8aa80f..073409bd5 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -93,12 +93,30 @@ class BandcampIE(InfoExtractor):
         track_number = None
         duration = None
 
+        scriptdatablocks = re.findall(r'<script type="text/javascript" src="[^"]+" nonce="[^"]+" (?:data-[a-z-]+="[^"]+" ?)+>', webpage)
+        bandcamp_data = {}
+        for block in scriptdatablocks:
+            datablocks = re.findall(r'data-([a-z]+)="([^"]+)"', block)
+            for name, dblock in datablocks:
+                if name in ('tralbum', 'embed'):
+                    data = self._parse_json(dblock, 'test', transform_source=unescapeHTML)
+                    bandcamp_data[name] = data
+                else:
+                    continue
+        for key, value in bandcamp_data.items():
+            print(key)
+            for ikey, ivalue in value.items():
+                if isinstance(ivalue, dict):
+                    print('\t', ikey)
+                    for iikey, iivalue in ivalue.items():
+                        print('\t\t', iikey, iivalue)
+                else:
+                    print('\t', ikey, ivalue)
+
         formats = []
-        trackinfo_block = self._html_search_regex(
-            r'trackinfo(?:["\']|&quot;):\[\s*({.+?})\s*\],(?:["\']|&quot;)',
-            webpage, 'track info', default='{}')
 
-        track_info = self._parse_json(trackinfo_block, title)
+        track_info = bandcamp_data['tralbum']['trackinfo'][0]
+
         if track_info:
             file_ = track_info.get('file')
             if isinstance(file_, dict):
@@ -120,10 +138,11 @@ class BandcampIE(InfoExtractor):
             duration = float_or_none(track_info.get('duration'))
 
         def extract(key):
-            data = self._html_search_regex(
-                r',(["\']|&quot;)%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key,
-                webpage, key, default=None, group='value')
-            return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
+            for values in bandcamp_data['tralbum']['current'], bandcamp_data['embed'], bandcamp_data['tralbum']:
+                if key in values and values[key]:
+                    return values[key]
+            else:
+                return None
 
         track = extract('title')
         artist = extract('artist')
@@ -132,14 +151,9 @@ class BandcampIE(InfoExtractor):
             extract('publish_date') or extract('album_publish_date'))
         release_date = unified_strdate(extract('album_release_date'))
 
-        download_link = self._search_regex(
-            r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
-            'download link', default=None, group='url')
+        download_link = bandcamp_data['tralbum'].get('freeDownloadPage')
         if download_link:
-            track_id = self._search_regex(
-                r'\?id=(?P<id>\d+)&',
-                download_link, 'track id')
-
+            print(download_link)
             download_webpage = self._download_webpage(
                 download_link, track_id, 'Downloading free downloads page')
 
@@ -202,6 +216,7 @@ class BandcampIE(InfoExtractor):
         self._sort_formats(formats)
 
         title = '%s - %s' % (artist, track) if artist else track
+        print(title)
 
         if not duration:
             duration = float_or_none(self._html_search_meta(