forked from Ponysearch/Ponysearch
Merge pull request #1628 from MarcAbonce/google_fix
[fix] Update xpaths for new Google results page
This commit is contained in:
commit
4cddb829f9
2 changed files with 44 additions and 94 deletions
|
@ -107,13 +107,12 @@ images_path = '/images'
|
||||||
supported_languages_url = 'https://www.google.com/preferences?#languages'
|
supported_languages_url = 'https://www.google.com/preferences?#languages'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//div[@class="g"]'
|
results_xpath = '//div[contains(@class, "ZINbbc")]'
|
||||||
url_xpath = './/h3/a/@href'
|
url_xpath = './/div[@class="kCrYT"][1]/a/@href'
|
||||||
title_xpath = './/h3'
|
title_xpath = './/div[@class="kCrYT"][1]/a/div[1]'
|
||||||
content_xpath = './/span[@class="st"]'
|
content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]'
|
||||||
content_misc_xpath = './/div[@class="f slp"]'
|
suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]'
|
||||||
suggestion_xpath = '//p[@class="_Bmc"]'
|
spelling_suggestion_xpath = '//div[@id="scc"]//a'
|
||||||
spelling_suggestion_xpath = '//a[@class="spell"]'
|
|
||||||
|
|
||||||
# map : detail location
|
# map : detail location
|
||||||
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
|
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
|
||||||
|
@ -199,10 +198,6 @@ def request(query, params):
|
||||||
params['headers']['Accept-Language'] = language + ',' + language + '-' + country
|
params['headers']['Accept-Language'] = language + ',' + language + '-' + country
|
||||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||||
|
|
||||||
# Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
|
|
||||||
params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
|
|
||||||
"AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
|
|
||||||
|
|
||||||
params['google_hostname'] = google_hostname
|
params['google_hostname'] = google_hostname
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -274,9 +269,7 @@ def response(resp):
|
||||||
content = extract_text_from_dom(result, content_xpath)
|
content = extract_text_from_dom(result, content_xpath)
|
||||||
if content is None:
|
if content is None:
|
||||||
continue
|
continue
|
||||||
content_misc = extract_text_from_dom(result, content_misc_xpath)
|
|
||||||
if content_misc is not None:
|
|
||||||
content = content_misc + "<br />" + content
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase):
|
||||||
self.assertEqual(google.response(response), [])
|
self.assertEqual(google.response(response), [])
|
||||||
|
|
||||||
html = """
|
html = """
|
||||||
<div class="g">
|
<div class="ZINbbc xpd O9g5cc uUPGi">
|
||||||
<h3 class="r">
|
<div>
|
||||||
<a href="http://this.should.be.the.link/">
|
<div class="kCrYT">
|
||||||
<b>This</b> is <b>the</b> title
|
<a href="/url?q=http://this.should.be.the.link/">
|
||||||
</a>
|
<div class="BNeawe">
|
||||||
</h3>
|
<b>This</b> is <b>the</b> title
|
||||||
<div class="s">
|
|
||||||
<div class="kv" style="margin-bottom:2px">
|
|
||||||
<cite>
|
|
||||||
<b>test</b>.psychologies.com/
|
|
||||||
</cite>
|
|
||||||
<div class="_nBb">
|
|
||||||
<div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
|
|
||||||
aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
|
|
||||||
<span class="_O0">
|
|
||||||
</span>
|
|
||||||
</div>
|
</div>
|
||||||
<div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
|
<div class="BNeawe">
|
||||||
<ul>
|
http://website
|
||||||
<li class="_Ykb">
|
</div>
|
||||||
<a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
|
</a>
|
||||||
.com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
|
</div>
|
||||||
En cache
|
<div class="kCrYT">
|
||||||
</a>
|
<div>
|
||||||
</li>
|
<div class="BNeawe">
|
||||||
<li class="_Ykb">
|
<div>
|
||||||
<a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/">
|
<div class="BNeawe">
|
||||||
Pages similaires
|
This should be the content.
|
||||||
</a>
|
</div>
|
||||||
</li>
|
</div>
|
||||||
</ul>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<span class="st">
|
</div>
|
||||||
This should be the content.
|
</p>
|
||||||
</span>
|
<div class="ZINbbc xpd O9g5cc uUPGi">
|
||||||
<br>
|
<div>
|
||||||
<div class="osl">
|
<div class="kCrYT">
|
||||||
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
|
<span>
|
||||||
Test Personnalité
|
<div class="BNeawe">
|
||||||
</a> -
|
Related searches
|
||||||
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
|
</div>
|
||||||
Tests - Moi
|
</span>
|
||||||
</a> -
|
</div>
|
||||||
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
|
<div class="rVLSBd">
|
||||||
Test Couple
|
<a>
|
||||||
</a>
|
<div>
|
||||||
-
|
<div class="BNeawe">
|
||||||
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
|
suggestion title
|
||||||
Test Amour
|
</div>
|
||||||
|
</div>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
<div class="g">
|
|
||||||
<h3 class="r">
|
|
||||||
<a href="http://www.google.com/images?q=toto">
|
|
||||||
<b>This</b>
|
|
||||||
</a>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<div class="g">
|
|
||||||
<h3 class="r">
|
|
||||||
<a href="http://www.google.com/search?q=toto">
|
|
||||||
<b>This</b> is
|
|
||||||
</a>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<div class="g">
|
|
||||||
<h3 class="r">
|
|
||||||
<a href="€">
|
|
||||||
<b>This</b> is <b>the</b>
|
|
||||||
</a>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<div class="g">
|
|
||||||
<h3 class="r">
|
|
||||||
<a href="/url?q=url">
|
|
||||||
<b>This</b> is <b>the</b>
|
|
||||||
</a>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<p class="_Bmc" style="margin:3px 8px">
|
|
||||||
<a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved=">
|
|
||||||
suggestion <b>title</b>
|
|
||||||
</a>
|
|
||||||
</p>
|
</p>
|
||||||
"""
|
"""
|
||||||
response = self.mock_response(html)
|
response = self.mock_response(html)
|
||||||
|
|
Loading…
Reference in a new issue