Merge pull request #2246 from dalf/mod-searx-data

[mod] Add searx.data module
This commit is contained in:
Alexandre Flament 2020-10-07 10:38:13 +02:00 committed by GitHub
commit 8b278cbfad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 37 additions and 41 deletions

21
searx/data/__init__.py Normal file
View file

@ -0,0 +1,21 @@
import json
from pathlib import Path
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
data_dir = Path(__file__).parent
def load(filename):
# add str(...) for Python 3.5
with open(str(data_dir / filename), encoding='utf-8') as fd:
return json.load(fd)
def bangs_loader():
return load('bangs.json')
ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')

View file

@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import sys
import threading
from os.path import realpath, dirname
from io import open
from babel.localedata import locale_identifiers
from flask_babel import gettext
from operator import itemgetter
from json import loads
from searx import settings
from searx import logger
from searx.data import ENGINES_LANGUAGES
from searx.poolrequests import get
from searx.utils import load_module, match_language, get_engine_from_settings
@ -38,7 +37,6 @@ engines = {}
categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
@ -108,8 +106,8 @@ def load_engine(engine_data):
sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
if engine_data['name'] in ENGINES_LANGUAGES:
setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])
# find custom aliases for non standard language codes
if hasattr(engine, 'supported_languages'):

View file

@ -1,11 +1,11 @@
import json
import re
import os
import unicodedata
from io import open
from datetime import datetime
from searx.data import CURRENCIES
categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@ -13,8 +13,6 @@ weight = 100
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s')
@ -23,17 +21,17 @@ def normalize_name(name):
def name_to_iso4217(name):
global db
global CURRENCIES
name = normalize_name(name)
currencies = db['names'].get(name, [name])
return currencies[0]
currency = CURRENCIES['names'].get(name, [name])
return currency[0]
def iso4217_to_name(iso4217, language):
global db
global CURRENCIES
return db['iso4217'].get(iso4217, {}).get(language, iso4217)
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
def request(query, params):
@ -82,15 +80,3 @@ def response(resp):
results.append({'answer': answer, 'url': url})
return results
def load():
global db
current_dir = os.path.dirname(os.path.realpath(__file__))
json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()
db = json.loads(json_data)
load()

View file

@ -1,7 +1,4 @@
import json
from os.path import join
from searx import searx_dir
from searx.data import bangs_loader
# bangs data coming from the following url convert to json with
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
@ -9,10 +6,9 @@ from searx import searx_dir
# NOTE only use the get_bang_url
bangs_data = {}
with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file:
for bang in json.load(json_file)['bang']:
for trigger in bang["triggers"]:
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
for bang in bangs_loader()['bang']:
for trigger in bang["triggers"]:
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
def get_bang_url(search_query):

View file

@ -1,13 +1,10 @@
# -*- coding: utf-8 -*-
import os
import sys
import re
import json
import importlib
from numbers import Number
from os.path import splitext, join
from io import open
from random import choice
from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse, unquote
@ -18,6 +15,7 @@ from babel.core import get_global
from searx import settings
from searx.data import USER_AGENTS
from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import logger
@ -31,9 +29,6 @@ blocked_tags = ('script',
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ "/data/useragents.json", 'r', encoding='utf-8').read())
xpath_cache = dict()
lang_to_lc_cache = dict()
@ -50,7 +45,7 @@ def gen_useragent(os=None):
See searx/data/useragents.json
"""
return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])))
class HTMLTextExtractorException(Exception):