forked from Ponysearch/Ponysearch
Merge pull request #2094 from xywei/xpath-handle-relative-url
Extract relative urls that do not start with / using xpath engine
This commit is contained in:
commit
7b71954d7e
1 changed files with 4 additions and 0 deletions
|
@ -61,6 +61,10 @@ def extract_url(xpath_results, search_url):
|
||||||
# fix relative url to the search engine
|
# fix relative url to the search engine
|
||||||
url = urljoin(search_url, url)
|
url = urljoin(search_url, url)
|
||||||
|
|
||||||
|
# fix relative urls that fall through the crack
|
||||||
|
if '://' not in url:
|
||||||
|
url = urljoin(search_url, url)
|
||||||
|
|
||||||
# normalize url
|
# normalize url
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue