Merge pull request #2500 from dalf/github-action-data

[enh] every Sunday, call utils/fetch_*.py scripts and create a PR automatically
This commit is contained in:
Alexandre Flament 2021-02-01 17:16:58 +01:00 committed by GitHub
commit 34de715e62
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 71 additions and 3 deletions

67
.github/workflows/data-update.yml vendored Normal file
View file

@ -0,0 +1,67 @@
name: "Update searx.data"
on:
schedule:
- cron: "37 13 * * 0"
jobs:
updateData:
name: Update data
runs-on: ubuntu-20.04
if: env.DATA_PR_TOKEN != null
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install Ubuntu packages
run: |
sudo ./utils/searx.sh install packages
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
architecture: 'x64'
- name: Cache Python dependencies
id: cache-python
uses: actions/cache@v2
with:
path: ./local
key: python-${{ matrix.os }}-3.9-${{ hashFiles('requirements*.txt', 'setup.py') }}
- name: Install Python dependencies
if: steps.cache-python.outputs.cache-hit != 'true'
run: |
make V=1 install
- name: Fetch data
run: |
source local/py3/bin/activate
python utils/fetch_firefox_version.py
python utils/fetch_languages.py
python utils/fetch_ahmia_blacklist.py
python utils/fetch_wikidata_units.py
# python utils/fetch_currencies.py
- name: Create Pull Request
id: cpr
uses: peter-evans/create-pull-request@v3
with:
token: ${{ secrets.DATA_PR_TOKEN }}
commit-message: Update searx.data
committer: searx-bot <noreply@github.com>
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
signoff: false
branch: automatic-update-data
delete-branch: true
title: 'Update searx.data'
body: |
Update searx.data
labels: |
data
draft: false
- name: Check outputs
run: |
echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"

View file

@ -6,18 +6,19 @@
# are written in current directory to avoid overwriting in case something goes wrong. # are written in current directory to avoid overwriting in case something goes wrong.
import json import json
from pathlib import Path
from pprint import pformat from pprint import pformat
from sys import path from sys import path
from babel import Locale, UnknownLocaleError from babel import Locale, UnknownLocaleError
from babel.languages import get_global from babel.languages import get_global
path.append('../searx') # noqa path.append('../searx') # noqa
from searx import settings from searx import settings, searx_dir
from searx.engines import initialize_engines, engines from searx.engines import initialize_engines, engines
# Output files. # Output files.
engines_languages_file = 'engines_languages.json' engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
languages_file = 'languages.py' languages_file = Path(searx_dir) / 'languages.py'
# Fetchs supported languages for each engine and writes json file with those. # Fetchs supported languages for each engine and writes json file with those.