[fix] fixes google play engines and adds thumbnails to their results (#1612)

fix google play apps, google play apps, google play music engines

xpath engine: thumbnail_xpath can define an optional thumbnail
This commit is contained in:
Venca24 2019-07-25 07:46:41 +02:00 committed by Alexandre Flament
parent d0dd296424
commit 87baa74a86
2 changed files with 37 additions and 17 deletions

View file

@ -7,6 +7,7 @@ search_url = None
url_xpath = None url_xpath = None
content_xpath = None content_xpath = None
title_xpath = None title_xpath = None
thumbnail_xpath = False
paging = False paging = False
suggestion_xpath = '' suggestion_xpath = ''
results_xpath = '' results_xpath = ''
@ -40,7 +41,9 @@ def extract_text(xpath_results):
return ''.join(xpath_results) return ''.join(xpath_results)
else: else:
# it's a element # it's a element
text = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False) text = html.tostring(
xpath_results, encoding='unicode', method='text', with_tail=False
)
text = text.strip().replace('\n', ' ') text = text.strip().replace('\n', ' ')
return ' '.join(text.split()) return ' '.join(text.split())
@ -105,7 +108,18 @@ def response(resp):
url = extract_url(result.xpath(url_xpath), search_url) url = extract_url(result.xpath(url_xpath), search_url)
title = extract_text(result.xpath(title_xpath)) title = extract_text(result.xpath(title_xpath))
content = extract_text(result.xpath(content_xpath)) content = extract_text(result.xpath(content_xpath))
results.append({'url': url, 'title': title, 'content': content}) tmp_result = {'url': url, 'title': title, 'content': content}
# add thumbnail if available
thumbnail = None
if thumbnail_xpath:
thumbnail = extract_url(
result.xpath(thumbnail_xpath), search_url
)
if thumbnail:
tmp_result['img_src'] = thumbnail
results.append(tmp_result)
else: else:
for url, title, content in zip( for url, title, content in zip(
(extract_url(x, search_url) for (extract_url(x, search_url) for

View file

@ -311,31 +311,37 @@ engines:
shortcut : gos shortcut : gos
- name : google play apps - name : google play apps
engine : xpath engine : xpath
search_url : https://play.google.com/store/search?q={query}&c=apps search_url : https://play.google.com/store/search?q={query}&c=apps
url_xpath : //a[@class="title"]/@href results_xpath : '//div[@class="WHE7ib mpg5gc"]'
title_xpath : //a[@class="title"] title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
content_xpath : //a[@class="subtitle"] url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
categories : files categories : files
shortcut : gpa shortcut : gpa
disabled : True disabled : True
- name : google play movies - name : google play movies
engine : xpath engine : xpath
search_url : https://play.google.com/store/search?q={query}&c=movies search_url : https://play.google.com/store/search?q={query}&c=movies
url_xpath : //a[@class="title"]/@href results_xpath : '//div[@class="WHE7ib mpg5gc"]'
title_xpath : //a[@class="title"]/@title title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
content_xpath : //a[contains(@class, "subtitle")] url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
categories : videos categories : videos
shortcut : gpm shortcut : gpm
disabled : True disabled : True
- name : google play music - name : google play music
engine : xpath engine : xpath
search_url : https://play.google.com/store/search?q={query}&c=music search_url : https://play.google.com/store/search?q={query}&c=music
url_xpath : //a[@class="title"]/@href results_xpath : '//div[@class="WHE7ib mpg5gc"]'
title_xpath : //a[@class="title"] title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
content_xpath : //a[@class="subtitle"] url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
categories : music categories : music
shortcut : gps shortcut : gps
disabled : True disabled : True