Update mullvad_leta.py to account for img_elem

A recent update from Mullvad Leta introduced the img_elem. This update
broke the existing logic. Now, by checking the length of the dom_result
to see if it was included in the return results, we can handle the logic
accordingly.
This commit is contained in:
Grant Lanham 2024-07-08 15:16:53 -04:00 committed by Markus Heiser
parent 2039060b64
commit 9a4fa7cc4f

View file

@ -128,7 +128,14 @@ def request(query: str, params: dict):
def extract_result(dom_result: list[html.HtmlElement]): def extract_result(dom_result: list[html.HtmlElement]):
# Infoboxes sometimes appear in the beginning and will have a length of 0
if len(dom_result) == 3:
[a_elem, h3_elem, p_elem] = dom_result [a_elem, h3_elem, p_elem] = dom_result
elif len(dom_result) == 4:
[_, a_elem, h3_elem, p_elem] = dom_result
else:
return None
return { return {
'url': extract_text(a_elem.text), 'url': extract_text(a_elem.text),
'title': extract_text(h3_elem), 'title': extract_text(h3_elem),
@ -139,9 +146,9 @@ def extract_result(dom_result: list[html.HtmlElement]):
def extract_results(search_results: html.HtmlElement): def extract_results(search_results: html.HtmlElement):
for search_result in search_results: for search_result in search_results:
dom_result = eval_xpath_list(search_result, 'div/div/*') dom_result = eval_xpath_list(search_result, 'div/div/*')
# sometimes an info box pops up, will need to filter that out result = extract_result(dom_result)
if len(dom_result) == 3: if result is not None:
yield extract_result(dom_result) yield result
def response(resp: Response): def response(resp: Response):