forked from Ponysearch/Ponysearch
[mod] add/modify image fetching for bing_news, qwant and twitter engines
This commit is contained in:
parent
4cffd78650
commit
f5128c7cb9
4 changed files with 28 additions and 11 deletions
|
@ -112,12 +112,11 @@ def response(resp):
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
results.append({'template': 'videos.html',
|
results.append({'url': url,
|
||||||
'url': url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'publishedDate': publishedDate,
|
'publishedDate': publishedDate,
|
||||||
'content': content,
|
'content': content,
|
||||||
'thumbnail': thumbnail})
|
'img_src': thumbnail})
|
||||||
else:
|
else:
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -96,14 +96,27 @@ def response(resp):
|
||||||
'thumbnail_src': thumbnail_src,
|
'thumbnail_src': thumbnail_src,
|
||||||
'img_src': img_src})
|
'img_src': img_src})
|
||||||
|
|
||||||
elif (category_to_keyword.get(categories[0], '') == 'news' or
|
elif category_to_keyword.get(categories[0], '') == 'social':
|
||||||
category_to_keyword.get(categories[0], '') == 'social'):
|
|
||||||
published_date = datetime.fromtimestamp(result['date'], None)
|
published_date = datetime.fromtimestamp(result['date'], None)
|
||||||
|
img_src = result.get('img', None)
|
||||||
results.append({'url': res_url,
|
results.append({'url': res_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'publishedDate': published_date,
|
'publishedDate': published_date,
|
||||||
'content': content})
|
'content': content,
|
||||||
|
'img_src': img_src})
|
||||||
|
|
||||||
|
elif category_to_keyword.get(categories[0], '') == 'news':
|
||||||
|
published_date = datetime.fromtimestamp(result['date'], None)
|
||||||
|
media = result.get('media', [])
|
||||||
|
if len(media) > 0:
|
||||||
|
img_src = media[0].get('pict', {}).get('url', None)
|
||||||
|
else:
|
||||||
|
img_src = None
|
||||||
|
results.append({'url': res_url,
|
||||||
|
'title': title,
|
||||||
|
'publishedDate': published_date,
|
||||||
|
'content': content,
|
||||||
|
'img_src': img_src})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ search_url = base_url + 'search?'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//li[@data-item-type="tweet"]'
|
results_xpath = '//li[@data-item-type="tweet"]'
|
||||||
|
avatar_xpath = './/img[contains(@class, "avatar")]/@src'
|
||||||
link_xpath = './/small[@class="time"]//a'
|
link_xpath = './/small[@class="time"]//a'
|
||||||
title_xpath = './/span[contains(@class, "username")]'
|
title_xpath = './/span[contains(@class, "username")]'
|
||||||
content_xpath = './/p[contains(@class, "tweet-text")]'
|
content_xpath = './/p[contains(@class, "tweet-text")]'
|
||||||
|
@ -57,6 +58,8 @@ def response(resp):
|
||||||
try:
|
try:
|
||||||
link = tweet.xpath(link_xpath)[0]
|
link = tweet.xpath(link_xpath)[0]
|
||||||
content = extract_text(tweet.xpath(content_xpath)[0])
|
content = extract_text(tweet.xpath(content_xpath)[0])
|
||||||
|
img_src = tweet.xpath(avatar_xpath)[0]
|
||||||
|
img_src = img_src.replace('_bigger', '_normal')
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -71,12 +74,14 @@ def response(resp):
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': content,
|
'content': content,
|
||||||
|
'img_src': img_src,
|
||||||
'publishedDate': publishedDate})
|
'publishedDate': publishedDate})
|
||||||
else:
|
else:
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': content})
|
'content': content,
|
||||||
|
'img_src': img_src})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
self.assertEqual(results[0]['title'], 'Title')
|
self.assertEqual(results[0]['title'], 'Title')
|
||||||
self.assertEqual(results[0]['url'], 'http://url.of.article/')
|
self.assertEqual(results[0]['url'], 'http://url.of.article/')
|
||||||
self.assertEqual(results[0]['content'], 'Article Content')
|
self.assertEqual(results[0]['content'], 'Article Content')
|
||||||
self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
|
self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
|
||||||
self.assertEqual(results[1]['title'], 'Another Title')
|
self.assertEqual(results[1]['title'], 'Another Title')
|
||||||
self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
|
self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
|
||||||
self.assertEqual(results[1]['content'], 'Another Article Content')
|
self.assertEqual(results[1]['content'], 'Another Article Content')
|
||||||
self.assertNotIn('thumbnail', results[1])
|
self.assertNotIn('img_src', results[1])
|
||||||
|
|
||||||
html = """<?xml version="1.0" encoding="utf-8" ?>
|
html = """<?xml version="1.0" encoding="utf-8" ?>
|
||||||
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
||||||
|
@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
self.assertEqual(results[0]['title'], 'Title')
|
self.assertEqual(results[0]['title'], 'Title')
|
||||||
self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
|
self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
|
||||||
self.assertEqual(results[0]['content'], 'Article Content')
|
self.assertEqual(results[0]['content'], 'Article Content')
|
||||||
self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
|
self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image')
|
||||||
|
|
||||||
html = """<?xml version="1.0" encoding="utf-8" ?>
|
html = """<?xml version="1.0" encoding="utf-8" ?>
|
||||||
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
||||||
|
|
Loading…
Reference in a new issue