From 066bd916bf0c0344c978d2ea46cf9e9960841a61 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Sun, 28 May 2017 15:46:45 +0200
Subject: [PATCH 1/2] [mod] fetch firefox versions in a standalone script

---
 searx/data/useragents.json     | 15 ++++++++
 searx/utils.py                 | 28 ++++----------
 utils/fetch_firefox_version.py | 69 ++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 20 deletions(-)
 create mode 100644 searx/data/useragents.json
 create mode 100755 utils/fetch_firefox_version.py

diff --git a/searx/data/useragents.json b/searx/data/useragents.json
new file mode 100644
index 000000000..ba80ce885
--- /dev/null
+++ b/searx/data/useragents.json
@@ -0,0 +1,15 @@
+{
+    "os": [
+        "Windows NT 10; WOW64", 
+        "X11; Linux x86_64"
+    ], 
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", 
+    "versions": [
+        "59.0.2", 
+        "59.0.1", 
+        "59.0", 
+        "58.0.2", 
+        "58.0.1", 
+        "58.0"
+    ]
+}
\ No newline at end of file
diff --git a/searx/utils.py b/searx/utils.py
index bd6c3fe2f..f457284e3 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -10,8 +10,10 @@ from codecs import getincrementalencoder
 from imp import load_source
 from numbers import Number
 from os.path import splitext, join
+from io import open
 from random import choice
 import sys
+import json
 
 from searx import settings
 from searx.version import VERSION_STRING
@@ -39,29 +41,11 @@ else:
 
 logger = logger.getChild('utils')
 
-ua_versions = ('52.8.1',
-               '53.0',
-               '54.0',
-               '55.0',
-               '56.0',
-               '57.0',
-               '58.0',
-               '59.0',
-               '60.0.2')
-
-ua_os = ('Windows NT 6.3; WOW64',
-         'X11; Linux x86_64',
-         'X11; Linux x86')
-
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
-
 blocked_tags = ('script',
                 'style')
 
-
-def gen_useragent(os=None):
-    # TODO
-    return ua.format(os=os or choice(ua_os), version=choice(ua_versions))
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+                             + "/data/useragents.json", 'r', encoding='utf-8').read())
 
 
 def searx_useragent():
@@ -70,6 +54,10 @@ def searx_useragent():
            suffix=settings['outgoing'].get('useragent_suffix', ''))
 
 
+def gen_useragent():
+    return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions'])))
+
+
 def highlight_content(content, query):
 
     if not content:
diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py
new file mode 100755
index 000000000..21d6e82ff
--- /dev/null
+++ b/utils/fetch_firefox_version.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+# set path
+from sys import path
+from os.path import realpath, dirname
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+#
+import json
+import requests
+import re
+from distutils.version import LooseVersion, StrictVersion
+from lxml import html
+from searx.url_utils import urlparse, urljoin
+
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
+RELEASE_PATH = '/pub/firefox/releases/'
+
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$')
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
+
+# 
+useragent = {
+    "versions": (),
+    "os": ('Windows NT 10; WOW64',
+           'X11; Linux x86_64'),
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
+}
+
+
+def fetch_firefox_versions():
+    resp = requests.get(URL, timeout=2.0)
+    if resp.status_code != 200:
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
+    else:
+        dom = html.fromstring(resp.text)
+        versions = []
+
+        for link in dom.xpath('//a/@href'):
+            url = urlparse(urljoin(URL, link))
+            path = url.path
+            if path.startswith(RELEASE_PATH):
+                version = path[len(RELEASE_PATH):-1]
+                if NORMAL_REGEX.match(version):
+                    versions.append(LooseVersion(version))
+
+        list.sort(versions, reverse=True)
+        return versions
+
+
+def fetch_firefox_last_versions():
+    versions = fetch_firefox_versions()
+
+    result = []
+    major_last = versions[0].version[0]
+    major_list = (major_last, major_last - 1)
+    for version in versions:
+        major_current = version.version[0]
+        if major_current in major_list and 'esr' not in version.version:
+            result.append(version.vstring)
+
+    return result
+
+
+useragent["versions"] = fetch_firefox_last_versions()
+f = open("../searx/data/useragents.json", "wb")
+json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8")
+f.close()

From 50c836864a9a7a765561d886b11f44d8cea0bce9 Mon Sep 17 00:00:00 2001
From: Alex <alex@al-f.net>
Date: Sun, 5 Aug 2018 10:55:42 +0200
Subject: [PATCH 2/2] fetch_firefox_version.py : compatible with Python 3 and
 minor fixes.

---
 searx/data/useragents.json     | 21 ++++++++++-----------
 searx/utils.py                 |  4 ++--
 utils/fetch_firefox_version.py | 20 ++++++++++++--------
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/searx/data/useragents.json b/searx/data/useragents.json
index ba80ce885..850bc418a 100644
--- a/searx/data/useragents.json
+++ b/searx/data/useragents.json
@@ -1,15 +1,14 @@
 {
-    "os": [
-        "Windows NT 10; WOW64", 
-        "X11; Linux x86_64"
-    ], 
-    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", 
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
     "versions": [
-        "59.0.2", 
-        "59.0.1", 
-        "59.0", 
-        "58.0.2", 
-        "58.0.1", 
-        "58.0"
+        "61.0.1",
+        "61.0",
+        "60.0.2",
+        "60.0.1",
+        "60.0"
+    ],
+    "os": [
+        "Windows NT 10; WOW64",
+        "X11; Linux x86_64"
     ]
 }
\ No newline at end of file
diff --git a/searx/utils.py b/searx/utils.py
index f457284e3..dfa22c5fc 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -54,8 +54,8 @@ def searx_useragent():
            suffix=settings['outgoing'].get('useragent_suffix', ''))
 
 
-def gen_useragent():
-    return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions'])))
+def gen_useragent(os=None):
+    return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
 
 
 def highlight_content(content, query):
diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py
index 21d6e82ff..ed179585b 100755
--- a/utils/fetch_firefox_version.py
+++ b/utils/fetch_firefox_version.py
@@ -2,7 +2,7 @@
 
 # set path
 from sys import path
-from os.path import realpath, dirname
+from os.path import realpath, dirname, join
 path.append(realpath(dirname(realpath(__file__)) + '/../'))
 
 #
@@ -12,16 +12,17 @@ import re
 from distutils.version import LooseVersion, StrictVersion
 from lxml import html
 from searx.url_utils import urlparse, urljoin
+from searx import searx_dir
 
 URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
 RELEASE_PATH = '/pub/firefox/releases/'
 
-NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$')
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
 # BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
 # ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
 
 # 
-useragent = {
+useragents = {
     "versions": (),
     "os": ('Windows NT 10; WOW64',
            'X11; Linux x86_64'),
@@ -57,13 +58,16 @@ def fetch_firefox_last_versions():
     major_list = (major_last, major_last - 1)
     for version in versions:
         major_current = version.version[0]
-        if major_current in major_list and 'esr' not in version.version:
+        if major_current in major_list:
             result.append(version.vstring)
 
     return result
 
 
-useragent["versions"] = fetch_firefox_last_versions()
-f = open("../searx/data/useragents.json", "wb")
-json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8")
-f.close()
+def get_useragents_filename():
+    return join(join(searx_dir, "data"), "useragents.json")
+
+
+useragents["versions"] = fetch_firefox_last_versions()
+with open(get_useragents_filename(), "w") as f:
+    json.dump(useragents, f, indent=4, ensure_ascii=False)