From 05c105b8371e3766dba35e815601881d83ef6383 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 17 Feb 2022 22:10:34 +0100 Subject: [PATCH 1/2] [fix] bandcamp: fix itemtype (album|track) and exceptions BTW: polish implementation and show tracklist for albums Closes: https://github.com/searxng/searxng/issues/883 Signed-off-by: Markus Heiser --- searx/engines/bandcamp.py | 48 ++++++++++++------- .../themes/simple/src/less/embedded.less | 10 ++++ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index f868b44ed..f83ca6d4f 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -1,16 +1,23 @@ -""" -Bandcamp (Music) +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Bandcamp (Music) @website https://bandcamp.com/ @provide-api no @results HTML @parse url, title, content, publishedDate, iframe_src, thumbnail + """ from urllib.parse import urlencode, urlparse, parse_qs from dateutil.parser import parse as dateparse from lxml import html -from searx.utils import extract_text + +from searx.utils import ( + eval_xpath_getindex, + eval_xpath_list, + extract_text, +) # about about = { @@ -26,12 +33,13 @@ categories = ['music'] paging = True base_url = "https://bandcamp.com/" -search_string = search_string = 'search?{query}&page={page}' -iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" +search_string = 'search?{query}&page={page}' +iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small" def request(query, params): '''pre-request callback + params: method : POST/GET headers : {} @@ -42,37 +50,45 @@ def request(query, params): ''' search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) - params['url'] = base_url + search_path - return params def response(resp): '''post-response callback + resp: requests response object ''' results = [] - tree = html.fromstring(resp.text) - search_results = tree.xpath('//li[contains(@class, "searchresult")]') - for result in search_results: - link = result.xpath('.//div[@class="itemurl"]/a')[0] - result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'): + + link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None) + if link is None: + continue + title = result.xpath('.//div[@class="heading"]/a/text()') - date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) content = result.xpath('.//div[@class="subhead"]/text()') new_result = { "url": extract_text(link), "title": extract_text(title), "content": extract_text(content), - "publishedDate": date, } + + date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None) + if date: + new_result["publishedDate"] = dateparse(date.replace("released ", "")) + thumbnail = result.xpath('.//div[@class="art"]/img/@src') if thumbnail: new_result['thumbnail'] = thumbnail[0] - if "album" in result.classes: + + result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower() + if "album" == itemtype: new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id) - elif "track" in result.classes: + elif "track" == itemtype: new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id) results.append(new_result) diff --git a/searx/static/themes/simple/src/less/embedded.less b/searx/static/themes/simple/src/less/embedded.less index 4a43ea78d..1074802f2 100644 --- a/searx/static/themes/simple/src/less/embedded.less +++ b/searx/static/themes/simple/src/less/embedded.less @@ -17,3 +17,13 @@ iframe[src^="https://www.mixcloud.com"] { // 200px, somtimes 250px. height: 250px; } + +iframe[src^="https://bandcamp.com/EmbeddedPlayer"] { + // show playlist + height: 350px; +} + +iframe[src^="https://bandcamp.com/EmbeddedPlayer/track"] { + // hide playlist + height: 120px; +} From fa3cd7a6967f7b621491eccc5b3d4d1495de21db Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 18 Feb 2022 22:46:51 +0100 Subject: [PATCH 2/2] [build] /static --- .../themes/oscar/css/logicodev.min.css.map | Bin 10142 -> 10142 bytes .../themes/simple/css/searxng-rtl.min.css | Bin 58779 -> 58913 bytes .../themes/simple/css/searxng-rtl.min.css.map | Bin 96820 -> 97056 bytes .../static/themes/simple/css/searxng.min.css | Bin 57751 -> 57885 bytes .../themes/simple/css/searxng.min.css.map | Bin 95213 -> 95449 bytes 5 files changed, 0 insertions(+), 0 deletions(-) diff --git a/searx/static/themes/oscar/css/logicodev.min.css.map b/searx/static/themes/oscar/css/logicodev.min.css.map index c28e2c7b3d2f4b40a16c03301b904198075a4d1e..52840b8ad15eb62d21541dd672c80de04cba5e78 100644 GIT binary patch delta 45 zcmV+|0Mh@SPo7VZd6R$w7O{x<0g-tRlWYMAv!4RyAOS;@h9hnPWV7`nd;ATcDSmpR4bho0OW8lA02blUSKrq!e47k(!yF nQDS9mIyrH=qyZka`Xxn)$=L`UhDHVj6_fAHl-T@u`VL}I<)&;goj_#g1xh{^bUON7cj*dFMAbuqh-`RLGldZ_@>8+xS5>82pc`3<> zxdnR3`MLV8xk;%hDXA#|If<32MKMaT3e_=r3JUuA3dI@uGT>=Mnx+;=IECcB_?Mho0gH8k_tC%dY&kw_;xV~M!6&axA{O* delta 22 ecmZ4RjdjZx)&;gq&RLt4Y(;KwXOd);O9B9HZwM&> diff --git a/searx/static/themes/simple/css/searxng.min.css b/searx/static/themes/simple/css/searxng.min.css index 500b39a15d1675ab735b41ee0a0c1ce418f00022..a773e43c81a0ea7522b5f296829cf4b99e192a27 100644 GIT binary patch delta 115 zcmbP!n0f9I<_%}2PfndKs-BdXmy(>ATcDSmpR4bho0OW8lA02blUSKrq!e47k(!yF nQDS9mIyrH=qyZka`Xxn)$=L`UhDHVj6_fAHl-T@u`W8tsmn#$ig>y2COF&{7shQ~+ qB~}W?rUug=@G~lS;4w(Qq$n{t8`-#w%#>8D#u*xIKPJeio&*41B1RSf delta 22 ecmccllJ)I#)&;gq&RLt4Y|mcVzFUY