Merge pull request #483 from misnyo/master

[fix]soundcloud.com guest client_id fetches dynamically
This commit is contained in:
Adam Tauber 2016-01-03 01:42:20 +01:00
commit 5b5d9524db

View file

@ -10,17 +10,19 @@
@parse url, title, content, publishedDate, embedded @parse url, title, content, publishedDate, embedded
""" """
import re
from StringIO import StringIO
from json import loads from json import loads
from lxml import etree
from urllib import urlencode, quote_plus from urllib import urlencode, quote_plus
from dateutil import parser from dateutil import parser
from searx import logger
from searx.poolrequests import get as http_get
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True
# api-key
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
# search-url # search-url
url = 'https://api.soundcloud.com/' url = 'https://api.soundcloud.com/'
search_url = url + 'search?{query}'\ search_url = url + 'search?{query}'\
@ -35,6 +37,30 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
def get_client_id():
response = http_get("https://soundcloud.com")
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok:
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
for app_js_url in app_js_urls:
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
return ""
# api-key
guest_client_id = get_client_id()
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 20 offset = (params['pageno'] - 1) * 20