From d3aa690a7a7250f2052338d20e179fbb2252fa43 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 3 Oct 2022 10:06:17 +0200 Subject: [PATCH] [mod] bing: fetch engine traits (data_type: supported_languages) Implements a fetch_traits function for the Bing engines. .. note:: Does not include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser --- searx/data/engine_traits.json | 1576 ++++++++++++++++++++++++++++++++- searx/engines/bing.py | 97 ++ searx/engines/bing_images.py | 1 + searx/engines/bing_news.py | 1 + searx/engines/bing_videos.py | 1 + 5 files changed, 1668 insertions(+), 8 deletions(-) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 658b7582f..b4fdd8b28 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3,8 +3,398 @@ "all_locale": null, "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "chr": "chr", + "cs": "cs", + "cy": "cy", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fa": "fa", + "fi": "fi", + "fil": "fil", + "fr": "fr", + "ga": "ga", + "gd": "gd", + "gl": "gl", + "gu": "gu", + "ha": "ha", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "hy": "hy", + "id": "id", + "ig": "ig", + "is": "is", + "it": "it", + "ja": "ja", + "ka": "ka", + "kk": "kk", + "km": "km", + "kn": "kn", + "ko": "ko", + "kok": "kok", + "ku": "ku", + "ky": "ky", + "lb": "lb", + "lo": "lo", + "lt": "lt", + "lv": "lv", + "mi": "mi", + "mk": "mk", + "ml": "ml", + "mn": "MN", + "mr": "mr", + "ms": "ms", + "mt": "mt", + "nb": "nb", + "ne": "ne", + "nl": "nl", + "nn": "nn", + "or": "or", + "pa": "pa", + "pa_Arab": "pa-Arab", + "pl": "pl", + "pt": "pt", + "qu": "quz", + "ro": "ro", + "ru": "ru", + "rw": "rw", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "sr_Latn": "sr_latn", + "sv": "sv", + "sw": "sw", + "ta": "ta", + "te": "te", + "tg": "tg", + "th": "th", + "ti": "ti", + "tk": "tk", + "tr": "tr", + "tt": "tt", + "ug": "ug", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "wo": "wo", + "xh": "xh", + "yo": "yo", + "zh": "zh_Hans", + "zh_Hans": "zh_Hans", + "zh_Hant": "zh_Hant", + "zu": "zu" + }, + "regions": { + "am-ET": "et", + "ar-AE": "ae", + "ar-BH": "bh", + "ar-DJ": "dj", + "ar-DZ": "dz", + "ar-EG": "eg", + "ar-ER": "er", + "ar-IL": "il", + "ar-IQ": "iq", + "ar-JO": "jo", + "ar-KM": "km", + "ar-KW": "kw", + "ar-LB": "lb", + "ar-LY": "ly", + "ar-MA": "ma", + "ar-MR": "mr", + "ar-OM": "om", + "ar-PS": "ps", + "ar-QA": "qa", + "ar-SA": "sa", + "ar-SD": "sd", + "ar-SO": "so", + "ar-SY": "sy", + "ar-TD": "td", + "ar-TN": "tn", + "ar-YE": "ye", + "az-AZ": "az", + "be-BY": "by", + "bg-BG": "bg", + "bn-BD": "bd", + "bs-BA": "ba", + "ca-AD": "ad", + "cs-CZ": "cz", + "da-DK": "dk", + "de-AT": "at", + "de-BE": "be", + "de-CH": "ch", + "de-DE": "de", + "de-LI": "li", + "de-LU": "lu", + "el-CY": "cy", + "el-GR": "gr", + "en-AG": "ag", + "en-AI": "ai", + "en-AS": "as", + "en-AU": "au", + "en-BB": "bb", + "en-BI": "bi", + "en-BM": "bm", + "en-BS": "bs", + "en-BW": "bw", + "en-BZ": "bz", + "en-CA": "ca", + "en-CC": "cc", + "en-CK": "ck", + "en-CM": "cm", + "en-CX": "cx", + "en-DM": "dm", + "en-ER": "er", + "en-FJ": "fj", + "en-FK": "fk", + "en-FM": "fm", + "en-GB": "gb", + "en-GD": "gd", + "en-GG": "gg", + "en-GH": "gh", + "en-GI": "gi", + "en-GM": "gm", + "en-GU": "gu", + "en-GY": "gy", + "en-HK": "hk", + "en-IE": "ie", + "en-IN": "in", + "en-JE": "je", + "en-JM": "jm", + "en-KE": "ke", + "en-KI": "ki", + "en-KN": "kn", + "en-KY": "ky", + "en-LC": "lc", + "en-LR": "lr", + "en-LS": "ls", + "en-MG": "mg", + "en-MH": "mh", + "en-MP": "mp", + "en-MS": "ms", + "en-MT": "mt", + "en-MU": "mu", + "en-MW": "mw", + "en-NA": "na", + "en-NF": "nf", + "en-NG": "ng", + "en-NR": "nr", + "en-NU": "nu", + "en-NZ": "nz", + "en-PG": "pg", + "en-PH": "ph", + "en-PK": "pk", + "en-PN": "pn", + "en-PR": "pr", + "en-PW": "pw", + "en-RW": "rw", + "en-SB": "sb", + "en-SC": "sc", + "en-SD": "sd", + "en-SG": "sg", + "en-SH": "sh", + "en-SL": "sl", + "en-SS": "ss", + "en-SX": "sx", + "en-SZ": "sz", + "en-TC": "tc", + "en-TK": "tk", + "en-TO": "to", + "en-TT": "tt", + "en-TV": "tv", + "en-TZ": "tz", + "en-UG": "ug", + "en-US": "us", + "en-VC": "vc", + "en-VG": "vg", + "en-VI": "vi", + "en-VU": "vu", + "en-WS": "ws", + "en-ZA": "za", + "en-ZM": "zm", + "en-ZW": "zw", + "es-AR": "ar", + "es-BO": "bo", + "es-CL": "cl", + "es-CO": "co", + "es-CR": "cr", + "es-DO": "do", + "es-EC": "ec", + "es-ES": "es", + "es-GQ": "gq", + "es-GT": "gt", + "es-HN": "hn", + "es-MX": "mx", + "es-NI": "ni", + "es-PA": "pa", + "es-PE": "pe", + "es-PR": "pr", + "es-PY": "py", + "es-SV": "sv", + "es-UY": "uy", + "es-VE": "ve", + "et-EE": "ee", + "fa-AF": "af", + "fa-IR": "ir", + "fi-FI": "fi", + "fil-PH": "ph", + "fr-BE": "be", + "fr-BF": "bf", + "fr-BI": "bi", + "fr-BJ": "bj", + "fr-BL": "bl", + "fr-CA": "ca", + "fr-CD": "cd", + "fr-CF": "cf", + "fr-CG": "cg", + "fr-CH": "ch", + "fr-CI": "ci", + "fr-CM": "cm", + "fr-DJ": "dj", + "fr-DZ": "dz", + "fr-FR": "fr", + "fr-GA": "ga", + "fr-GF": "gf", + "fr-GN": "gn", + "fr-GP": "gp", + "fr-GQ": "gq", + "fr-HT": "ht", + "fr-KM": "km", + "fr-LU": "lu", + "fr-MA": "ma", + "fr-MC": "mc", + "fr-MF": "mf", + "fr-MG": "mg", + "fr-ML": "ml", + "fr-MQ": "mq", + "fr-MU": "mu", + "fr-NC": "nc", + "fr-NE": "ne", + "fr-PF": "pf", + "fr-PM": "pm", + "fr-RE": "re", + "fr-RW": "rw", + "fr-SC": "sc", + "fr-SN": "sn", + "fr-SY": "sy", + "fr-TD": "td", + "fr-TG": "tg", + "fr-TN": "tn", + "fr-VU": "vu", + "fr-WF": "wf", + "fr-YT": "yt", + "ga-IE": "ie", + "he-IL": "il", + "hi-IN": "in", + "hr-BA": "ba", + "hr-HR": "hr", + "hu-HU": "hu", + "hy-AM": "am", + "id-ID": "id", + "is-IS": "is", + "it-CH": "ch", + "it-IT": "it", + "it-SM": "sm", + "it-VA": "va", + "ja-JP": "jp", + "ka-GE": "ge", + "kk-KZ": "kz", + "km-KH": "kh", + "ko-KR": "kr", + "ky-KG": "kg", + "lb-LU": "lu", + "lo-LA": "la", + "lt-LT": "lt", + "lv-LV": "lv", + "mi-NZ": "nz", + "mk-MK": "mk", + "mn-MN": "mn", + "ms-BN": "bn", + "ms-MY": "my", + "ms-SG": "sg", + "mt-MT": "mt", + "nb-NO": "no", + "ne-NP": "np", + "nl-AW": "aw", + "nl-BE": "be", + "nl-CW": "cw", + "nl-NL": "nl", + "nl-SR": "sr", + "nl-SX": "sx", + "nn-NO": "no", + "pl-PL": "pl", + "pt-AO": "ao", + "pt-BR": "br", + "pt-CV": "cv", + "pt-GQ": "gq", + "pt-GW": "gw", + "pt-MO": "mo", + "pt-MZ": "mz", + "pt-PT": "pt", + "pt-ST": "st", + "qu-BO": "bo", + "qu-EC": "ec", + "qu-PE": "pe", + "ro-MD": "md", + "ro-RO": "ro", + "ru-BY": "by", + "ru-KG": "kg", + "ru-KZ": "kz", + "ru-RU": "ru", + "ru-UA": "ua", + "rw-RW": "rw", + "si-LK": "lk", + "sk-SK": "sk", + "sl-SI": "si", + "sq-AL": "al", + "sr-BA": "ba", + "sr-ME": "me", + "sr-RS": "rs", + "sv-FI": "fi", + "sv-SE": "se", + "sw-KE": "ke", + "sw-TZ": "tz", + "sw-UG": "ug", + "ta-LK": "lk", + "ta-SG": "sg", + "tg-TJ": "tj", + "th-TH": "th", + "ti-ER": "er", + "tk-TM": "tm", + "tr-CY": "cy", + "tr-TR": "tr", + "uk-UA": "ua", + "ur-PK": "pk", + "uz-UZ": "uz", + "vi-VN": "vn", + "wo-SN": "sn", + "yo-NG": "ng", + "zh-CN": "cn", + "zh-HK": "hk", + "zh-MO": "mo", + "zh-SG": "sg", + "zh-TW": "tw" + }, "supported_languages": [ "af", "am", @@ -117,8 +507,398 @@ "all_locale": null, "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "chr": "chr", + "cs": "cs", + "cy": "cy", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fa": "fa", + "fi": "fi", + "fil": "fil", + "fr": "fr", + "ga": "ga", + "gd": "gd", + "gl": "gl", + "gu": "gu", + "ha": "ha", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "hy": "hy", + "id": "id", + "ig": "ig", + "is": "is", + "it": "it", + "ja": "ja", + "ka": "ka", + "kk": "kk", + "km": "km", + "kn": "kn", + "ko": "ko", + "kok": "kok", + "ku": "ku", + "ky": "ky", + "lb": "lb", + "lo": "lo", + "lt": "lt", + "lv": "lv", + "mi": "mi", + "mk": "mk", + "ml": "ml", + "mn": "MN", + "mr": "mr", + "ms": "ms", + "mt": "mt", + "nb": "nb", + "ne": "ne", + "nl": "nl", + "nn": "nn", + "or": "or", + "pa": "pa", + "pa_Arab": "pa-Arab", + "pl": "pl", + "pt": "pt", + "qu": "quz", + "ro": "ro", + "ru": "ru", + "rw": "rw", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "sr_Latn": "sr_latn", + "sv": "sv", + "sw": "sw", + "ta": "ta", + "te": "te", + "tg": "tg", + "th": "th", + "ti": "ti", + "tk": "tk", + "tr": "tr", + "tt": "tt", + "ug": "ug", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "wo": "wo", + "xh": "xh", + "yo": "yo", + "zh": "zh_Hans", + "zh_Hans": "zh_Hans", + "zh_Hant": "zh_Hant", + "zu": "zu" + }, + "regions": { + "am-ET": "et", + "ar-AE": "ae", + "ar-BH": "bh", + "ar-DJ": "dj", + "ar-DZ": "dz", + "ar-EG": "eg", + "ar-ER": "er", + "ar-IL": "il", + "ar-IQ": "iq", + "ar-JO": "jo", + "ar-KM": "km", + "ar-KW": "kw", + "ar-LB": "lb", + "ar-LY": "ly", + "ar-MA": "ma", + "ar-MR": "mr", + "ar-OM": "om", + "ar-PS": "ps", + "ar-QA": "qa", + "ar-SA": "sa", + "ar-SD": "sd", + "ar-SO": "so", + "ar-SY": "sy", + "ar-TD": "td", + "ar-TN": "tn", + "ar-YE": "ye", + "az-AZ": "az", + "be-BY": "by", + "bg-BG": "bg", + "bn-BD": "bd", + "bs-BA": "ba", + "ca-AD": "ad", + "cs-CZ": "cz", + "da-DK": "dk", + "de-AT": "at", + "de-BE": "be", + "de-CH": "ch", + "de-DE": "de", + "de-LI": "li", + "de-LU": "lu", + "el-CY": "cy", + "el-GR": "gr", + "en-AG": "ag", + "en-AI": "ai", + "en-AS": "as", + "en-AU": "au", + "en-BB": "bb", + "en-BI": "bi", + "en-BM": "bm", + "en-BS": "bs", + "en-BW": "bw", + "en-BZ": "bz", + "en-CA": "ca", + "en-CC": "cc", + "en-CK": "ck", + "en-CM": "cm", + "en-CX": "cx", + "en-DM": "dm", + "en-ER": "er", + "en-FJ": "fj", + "en-FK": "fk", + "en-FM": "fm", + "en-GB": "gb", + "en-GD": "gd", + "en-GG": "gg", + "en-GH": "gh", + "en-GI": "gi", + "en-GM": "gm", + "en-GU": "gu", + "en-GY": "gy", + "en-HK": "hk", + "en-IE": "ie", + "en-IN": "in", + "en-JE": "je", + "en-JM": "jm", + "en-KE": "ke", + "en-KI": "ki", + "en-KN": "kn", + "en-KY": "ky", + "en-LC": "lc", + "en-LR": "lr", + "en-LS": "ls", + "en-MG": "mg", + "en-MH": "mh", + "en-MP": "mp", + "en-MS": "ms", + "en-MT": "mt", + "en-MU": "mu", + "en-MW": "mw", + "en-NA": "na", + "en-NF": "nf", + "en-NG": "ng", + "en-NR": "nr", + "en-NU": "nu", + "en-NZ": "nz", + "en-PG": "pg", + "en-PH": "ph", + "en-PK": "pk", + "en-PN": "pn", + "en-PR": "pr", + "en-PW": "pw", + "en-RW": "rw", + "en-SB": "sb", + "en-SC": "sc", + "en-SD": "sd", + "en-SG": "sg", + "en-SH": "sh", + "en-SL": "sl", + "en-SS": "ss", + "en-SX": "sx", + "en-SZ": "sz", + "en-TC": "tc", + "en-TK": "tk", + "en-TO": "to", + "en-TT": "tt", + "en-TV": "tv", + "en-TZ": "tz", + "en-UG": "ug", + "en-US": "us", + "en-VC": "vc", + "en-VG": "vg", + "en-VI": "vi", + "en-VU": "vu", + "en-WS": "ws", + "en-ZA": "za", + "en-ZM": "zm", + "en-ZW": "zw", + "es-AR": "ar", + "es-BO": "bo", + "es-CL": "cl", + "es-CO": "co", + "es-CR": "cr", + "es-DO": "do", + "es-EC": "ec", + "es-ES": "es", + "es-GQ": "gq", + "es-GT": "gt", + "es-HN": "hn", + "es-MX": "mx", + "es-NI": "ni", + "es-PA": "pa", + "es-PE": "pe", + "es-PR": "pr", + "es-PY": "py", + "es-SV": "sv", + "es-UY": "uy", + "es-VE": "ve", + "et-EE": "ee", + "fa-AF": "af", + "fa-IR": "ir", + "fi-FI": "fi", + "fil-PH": "ph", + "fr-BE": "be", + "fr-BF": "bf", + "fr-BI": "bi", + "fr-BJ": "bj", + "fr-BL": "bl", + "fr-CA": "ca", + "fr-CD": "cd", + "fr-CF": "cf", + "fr-CG": "cg", + "fr-CH": "ch", + "fr-CI": "ci", + "fr-CM": "cm", + "fr-DJ": "dj", + "fr-DZ": "dz", + "fr-FR": "fr", + "fr-GA": "ga", + "fr-GF": "gf", + "fr-GN": "gn", + "fr-GP": "gp", + "fr-GQ": "gq", + "fr-HT": "ht", + "fr-KM": "km", + "fr-LU": "lu", + "fr-MA": "ma", + "fr-MC": "mc", + "fr-MF": "mf", + "fr-MG": "mg", + "fr-ML": "ml", + "fr-MQ": "mq", + "fr-MU": "mu", + "fr-NC": "nc", + "fr-NE": "ne", + "fr-PF": "pf", + "fr-PM": "pm", + "fr-RE": "re", + "fr-RW": "rw", + "fr-SC": "sc", + "fr-SN": "sn", + "fr-SY": "sy", + "fr-TD": "td", + "fr-TG": "tg", + "fr-TN": "tn", + "fr-VU": "vu", + "fr-WF": "wf", + "fr-YT": "yt", + "ga-IE": "ie", + "he-IL": "il", + "hi-IN": "in", + "hr-BA": "ba", + "hr-HR": "hr", + "hu-HU": "hu", + "hy-AM": "am", + "id-ID": "id", + "is-IS": "is", + "it-CH": "ch", + "it-IT": "it", + "it-SM": "sm", + "it-VA": "va", + "ja-JP": "jp", + "ka-GE": "ge", + "kk-KZ": "kz", + "km-KH": "kh", + "ko-KR": "kr", + "ky-KG": "kg", + "lb-LU": "lu", + "lo-LA": "la", + "lt-LT": "lt", + "lv-LV": "lv", + "mi-NZ": "nz", + "mk-MK": "mk", + "mn-MN": "mn", + "ms-BN": "bn", + "ms-MY": "my", + "ms-SG": "sg", + "mt-MT": "mt", + "nb-NO": "no", + "ne-NP": "np", + "nl-AW": "aw", + "nl-BE": "be", + "nl-CW": "cw", + "nl-NL": "nl", + "nl-SR": "sr", + "nl-SX": "sx", + "nn-NO": "no", + "pl-PL": "pl", + "pt-AO": "ao", + "pt-BR": "br", + "pt-CV": "cv", + "pt-GQ": "gq", + "pt-GW": "gw", + "pt-MO": "mo", + "pt-MZ": "mz", + "pt-PT": "pt", + "pt-ST": "st", + "qu-BO": "bo", + "qu-EC": "ec", + "qu-PE": "pe", + "ro-MD": "md", + "ro-RO": "ro", + "ru-BY": "by", + "ru-KG": "kg", + "ru-KZ": "kz", + "ru-RU": "ru", + "ru-UA": "ua", + "rw-RW": "rw", + "si-LK": "lk", + "sk-SK": "sk", + "sl-SI": "si", + "sq-AL": "al", + "sr-BA": "ba", + "sr-ME": "me", + "sr-RS": "rs", + "sv-FI": "fi", + "sv-SE": "se", + "sw-KE": "ke", + "sw-TZ": "tz", + "sw-UG": "ug", + "ta-LK": "lk", + "ta-SG": "sg", + "tg-TJ": "tj", + "th-TH": "th", + "ti-ER": "er", + "tk-TM": "tm", + "tr-CY": "cy", + "tr-TR": "tr", + "uk-UA": "ua", + "ur-PK": "pk", + "uz-UZ": "uz", + "vi-VN": "vn", + "wo-SN": "sn", + "yo-NG": "ng", + "zh-CN": "cn", + "zh-HK": "hk", + "zh-MO": "mo", + "zh-SG": "sg", + "zh-TW": "tw" + }, "supported_languages": [ "af", "am", @@ -231,8 +1011,398 @@ "all_locale": null, "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "chr": "chr", + "cs": "cs", + "cy": "cy", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fa": "fa", + "fi": "fi", + "fil": "fil", + "fr": "fr", + "ga": "ga", + "gd": "gd", + "gl": "gl", + "gu": "gu", + "ha": "ha", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "hy": "hy", + "id": "id", + "ig": "ig", + "is": "is", + "it": "it", + "ja": "ja", + "ka": "ka", + "kk": "kk", + "km": "km", + "kn": "kn", + "ko": "ko", + "kok": "kok", + "ku": "ku", + "ky": "ky", + "lb": "lb", + "lo": "lo", + "lt": "lt", + "lv": "lv", + "mi": "mi", + "mk": "mk", + "ml": "ml", + "mn": "MN", + "mr": "mr", + "ms": "ms", + "mt": "mt", + "nb": "nb", + "ne": "ne", + "nl": "nl", + "nn": "nn", + "or": "or", + "pa": "pa", + "pa_Arab": "pa-Arab", + "pl": "pl", + "pt": "pt", + "qu": "quz", + "ro": "ro", + "ru": "ru", + "rw": "rw", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "sr_Latn": "sr_latn", + "sv": "sv", + "sw": "sw", + "ta": "ta", + "te": "te", + "tg": "tg", + "th": "th", + "ti": "ti", + "tk": "tk", + "tr": "tr", + "tt": "tt", + "ug": "ug", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "wo": "wo", + "xh": "xh", + "yo": "yo", + "zh": "zh_Hans", + "zh_Hans": "zh_Hans", + "zh_Hant": "zh_Hant", + "zu": "zu" + }, + "regions": { + "am-ET": "et", + "ar-AE": "ae", + "ar-BH": "bh", + "ar-DJ": "dj", + "ar-DZ": "dz", + "ar-EG": "eg", + "ar-ER": "er", + "ar-IL": "il", + "ar-IQ": "iq", + "ar-JO": "jo", + "ar-KM": "km", + "ar-KW": "kw", + "ar-LB": "lb", + "ar-LY": "ly", + "ar-MA": "ma", + "ar-MR": "mr", + "ar-OM": "om", + "ar-PS": "ps", + "ar-QA": "qa", + "ar-SA": "sa", + "ar-SD": "sd", + "ar-SO": "so", + "ar-SY": "sy", + "ar-TD": "td", + "ar-TN": "tn", + "ar-YE": "ye", + "az-AZ": "az", + "be-BY": "by", + "bg-BG": "bg", + "bn-BD": "bd", + "bs-BA": "ba", + "ca-AD": "ad", + "cs-CZ": "cz", + "da-DK": "dk", + "de-AT": "at", + "de-BE": "be", + "de-CH": "ch", + "de-DE": "de", + "de-LI": "li", + "de-LU": "lu", + "el-CY": "cy", + "el-GR": "gr", + "en-AG": "ag", + "en-AI": "ai", + "en-AS": "as", + "en-AU": "au", + "en-BB": "bb", + "en-BI": "bi", + "en-BM": "bm", + "en-BS": "bs", + "en-BW": "bw", + "en-BZ": "bz", + "en-CA": "ca", + "en-CC": "cc", + "en-CK": "ck", + "en-CM": "cm", + "en-CX": "cx", + "en-DM": "dm", + "en-ER": "er", + "en-FJ": "fj", + "en-FK": "fk", + "en-FM": "fm", + "en-GB": "gb", + "en-GD": "gd", + "en-GG": "gg", + "en-GH": "gh", + "en-GI": "gi", + "en-GM": "gm", + "en-GU": "gu", + "en-GY": "gy", + "en-HK": "hk", + "en-IE": "ie", + "en-IN": "in", + "en-JE": "je", + "en-JM": "jm", + "en-KE": "ke", + "en-KI": "ki", + "en-KN": "kn", + "en-KY": "ky", + "en-LC": "lc", + "en-LR": "lr", + "en-LS": "ls", + "en-MG": "mg", + "en-MH": "mh", + "en-MP": "mp", + "en-MS": "ms", + "en-MT": "mt", + "en-MU": "mu", + "en-MW": "mw", + "en-NA": "na", + "en-NF": "nf", + "en-NG": "ng", + "en-NR": "nr", + "en-NU": "nu", + "en-NZ": "nz", + "en-PG": "pg", + "en-PH": "ph", + "en-PK": "pk", + "en-PN": "pn", + "en-PR": "pr", + "en-PW": "pw", + "en-RW": "rw", + "en-SB": "sb", + "en-SC": "sc", + "en-SD": "sd", + "en-SG": "sg", + "en-SH": "sh", + "en-SL": "sl", + "en-SS": "ss", + "en-SX": "sx", + "en-SZ": "sz", + "en-TC": "tc", + "en-TK": "tk", + "en-TO": "to", + "en-TT": "tt", + "en-TV": "tv", + "en-TZ": "tz", + "en-UG": "ug", + "en-US": "us", + "en-VC": "vc", + "en-VG": "vg", + "en-VI": "vi", + "en-VU": "vu", + "en-WS": "ws", + "en-ZA": "za", + "en-ZM": "zm", + "en-ZW": "zw", + "es-AR": "ar", + "es-BO": "bo", + "es-CL": "cl", + "es-CO": "co", + "es-CR": "cr", + "es-DO": "do", + "es-EC": "ec", + "es-ES": "es", + "es-GQ": "gq", + "es-GT": "gt", + "es-HN": "hn", + "es-MX": "mx", + "es-NI": "ni", + "es-PA": "pa", + "es-PE": "pe", + "es-PR": "pr", + "es-PY": "py", + "es-SV": "sv", + "es-UY": "uy", + "es-VE": "ve", + "et-EE": "ee", + "fa-AF": "af", + "fa-IR": "ir", + "fi-FI": "fi", + "fil-PH": "ph", + "fr-BE": "be", + "fr-BF": "bf", + "fr-BI": "bi", + "fr-BJ": "bj", + "fr-BL": "bl", + "fr-CA": "ca", + "fr-CD": "cd", + "fr-CF": "cf", + "fr-CG": "cg", + "fr-CH": "ch", + "fr-CI": "ci", + "fr-CM": "cm", + "fr-DJ": "dj", + "fr-DZ": "dz", + "fr-FR": "fr", + "fr-GA": "ga", + "fr-GF": "gf", + "fr-GN": "gn", + "fr-GP": "gp", + "fr-GQ": "gq", + "fr-HT": "ht", + "fr-KM": "km", + "fr-LU": "lu", + "fr-MA": "ma", + "fr-MC": "mc", + "fr-MF": "mf", + "fr-MG": "mg", + "fr-ML": "ml", + "fr-MQ": "mq", + "fr-MU": "mu", + "fr-NC": "nc", + "fr-NE": "ne", + "fr-PF": "pf", + "fr-PM": "pm", + "fr-RE": "re", + "fr-RW": "rw", + "fr-SC": "sc", + "fr-SN": "sn", + "fr-SY": "sy", + "fr-TD": "td", + "fr-TG": "tg", + "fr-TN": "tn", + "fr-VU": "vu", + "fr-WF": "wf", + "fr-YT": "yt", + "ga-IE": "ie", + "he-IL": "il", + "hi-IN": "in", + "hr-BA": "ba", + "hr-HR": "hr", + "hu-HU": "hu", + "hy-AM": "am", + "id-ID": "id", + "is-IS": "is", + "it-CH": "ch", + "it-IT": "it", + "it-SM": "sm", + "it-VA": "va", + "ja-JP": "jp", + "ka-GE": "ge", + "kk-KZ": "kz", + "km-KH": "kh", + "ko-KR": "kr", + "ky-KG": "kg", + "lb-LU": "lu", + "lo-LA": "la", + "lt-LT": "lt", + "lv-LV": "lv", + "mi-NZ": "nz", + "mk-MK": "mk", + "mn-MN": "mn", + "ms-BN": "bn", + "ms-MY": "my", + "ms-SG": "sg", + "mt-MT": "mt", + "nb-NO": "no", + "ne-NP": "np", + "nl-AW": "aw", + "nl-BE": "be", + "nl-CW": "cw", + "nl-NL": "nl", + "nl-SR": "sr", + "nl-SX": "sx", + "nn-NO": "no", + "pl-PL": "pl", + "pt-AO": "ao", + "pt-BR": "br", + "pt-CV": "cv", + "pt-GQ": "gq", + "pt-GW": "gw", + "pt-MO": "mo", + "pt-MZ": "mz", + "pt-PT": "pt", + "pt-ST": "st", + "qu-BO": "bo", + "qu-EC": "ec", + "qu-PE": "pe", + "ro-MD": "md", + "ro-RO": "ro", + "ru-BY": "by", + "ru-KG": "kg", + "ru-KZ": "kz", + "ru-RU": "ru", + "ru-UA": "ua", + "rw-RW": "rw", + "si-LK": "lk", + "sk-SK": "sk", + "sl-SI": "si", + "sq-AL": "al", + "sr-BA": "ba", + "sr-ME": "me", + "sr-RS": "rs", + "sv-FI": "fi", + "sv-SE": "se", + "sw-KE": "ke", + "sw-TZ": "tz", + "sw-UG": "ug", + "ta-LK": "lk", + "ta-SG": "sg", + "tg-TJ": "tj", + "th-TH": "th", + "ti-ER": "er", + "tk-TM": "tm", + "tr-CY": "cy", + "tr-TR": "tr", + "uk-UA": "ua", + "ur-PK": "pk", + "uz-UZ": "uz", + "vi-VN": "vn", + "wo-SN": "sn", + "yo-NG": "ng", + "zh-CN": "cn", + "zh-HK": "hk", + "zh-MO": "mo", + "zh-SG": "sg", + "zh-TW": "tw" + }, "supported_languages": [ "af", "am", @@ -345,8 +1515,398 @@ "all_locale": null, "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "chr": "chr", + "cs": "cs", + "cy": "cy", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fa": "fa", + "fi": "fi", + "fil": "fil", + "fr": "fr", + "ga": "ga", + "gd": "gd", + "gl": "gl", + "gu": "gu", + "ha": "ha", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "hy": "hy", + "id": "id", + "ig": "ig", + "is": "is", + "it": "it", + "ja": "ja", + "ka": "ka", + "kk": "kk", + "km": "km", + "kn": "kn", + "ko": "ko", + "kok": "kok", + "ku": "ku", + "ky": "ky", + "lb": "lb", + "lo": "lo", + "lt": "lt", + "lv": "lv", + "mi": "mi", + "mk": "mk", + "ml": "ml", + "mn": "MN", + "mr": "mr", + "ms": "ms", + "mt": "mt", + "nb": "nb", + "ne": "ne", + "nl": "nl", + "nn": "nn", + "or": "or", + "pa": "pa", + "pa_Arab": "pa-Arab", + "pl": "pl", + "pt": "pt", + "qu": "quz", + "ro": "ro", + "ru": "ru", + "rw": "rw", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "sr_Latn": "sr_latn", + "sv": "sv", + "sw": "sw", + "ta": "ta", + "te": "te", + "tg": "tg", + "th": "th", + "ti": "ti", + "tk": "tk", + "tr": "tr", + "tt": "tt", + "ug": "ug", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "wo": "wo", + "xh": "xh", + "yo": "yo", + "zh": "zh_Hans", + "zh_Hans": "zh_Hans", + "zh_Hant": "zh_Hant", + "zu": "zu" + }, + "regions": { + "am-ET": "et", + "ar-AE": "ae", + "ar-BH": "bh", + "ar-DJ": "dj", + "ar-DZ": "dz", + "ar-EG": "eg", + "ar-ER": "er", + "ar-IL": "il", + "ar-IQ": "iq", + "ar-JO": "jo", + "ar-KM": "km", + "ar-KW": "kw", + "ar-LB": "lb", + "ar-LY": "ly", + "ar-MA": "ma", + "ar-MR": "mr", + "ar-OM": "om", + "ar-PS": "ps", + "ar-QA": "qa", + "ar-SA": "sa", + "ar-SD": "sd", + "ar-SO": "so", + "ar-SY": "sy", + "ar-TD": "td", + "ar-TN": "tn", + "ar-YE": "ye", + "az-AZ": "az", + "be-BY": "by", + "bg-BG": "bg", + "bn-BD": "bd", + "bs-BA": "ba", + "ca-AD": "ad", + "cs-CZ": "cz", + "da-DK": "dk", + "de-AT": "at", + "de-BE": "be", + "de-CH": "ch", + "de-DE": "de", + "de-LI": "li", + "de-LU": "lu", + "el-CY": "cy", + "el-GR": "gr", + "en-AG": "ag", + "en-AI": "ai", + "en-AS": "as", + "en-AU": "au", + "en-BB": "bb", + "en-BI": "bi", + "en-BM": "bm", + "en-BS": "bs", + "en-BW": "bw", + "en-BZ": "bz", + "en-CA": "ca", + "en-CC": "cc", + "en-CK": "ck", + "en-CM": "cm", + "en-CX": "cx", + "en-DM": "dm", + "en-ER": "er", + "en-FJ": "fj", + "en-FK": "fk", + "en-FM": "fm", + "en-GB": "gb", + "en-GD": "gd", + "en-GG": "gg", + "en-GH": "gh", + "en-GI": "gi", + "en-GM": "gm", + "en-GU": "gu", + "en-GY": "gy", + "en-HK": "hk", + "en-IE": "ie", + "en-IN": "in", + "en-JE": "je", + "en-JM": "jm", + "en-KE": "ke", + "en-KI": "ki", + "en-KN": "kn", + "en-KY": "ky", + "en-LC": "lc", + "en-LR": "lr", + "en-LS": "ls", + "en-MG": "mg", + "en-MH": "mh", + "en-MP": "mp", + "en-MS": "ms", + "en-MT": "mt", + "en-MU": "mu", + "en-MW": "mw", + "en-NA": "na", + "en-NF": "nf", + "en-NG": "ng", + "en-NR": "nr", + "en-NU": "nu", + "en-NZ": "nz", + "en-PG": "pg", + "en-PH": "ph", + "en-PK": "pk", + "en-PN": "pn", + "en-PR": "pr", + "en-PW": "pw", + "en-RW": "rw", + "en-SB": "sb", + "en-SC": "sc", + "en-SD": "sd", + "en-SG": "sg", + "en-SH": "sh", + "en-SL": "sl", + "en-SS": "ss", + "en-SX": "sx", + "en-SZ": "sz", + "en-TC": "tc", + "en-TK": "tk", + "en-TO": "to", + "en-TT": "tt", + "en-TV": "tv", + "en-TZ": "tz", + "en-UG": "ug", + "en-US": "us", + "en-VC": "vc", + "en-VG": "vg", + "en-VI": "vi", + "en-VU": "vu", + "en-WS": "ws", + "en-ZA": "za", + "en-ZM": "zm", + "en-ZW": "zw", + "es-AR": "ar", + "es-BO": "bo", + "es-CL": "cl", + "es-CO": "co", + "es-CR": "cr", + "es-DO": "do", + "es-EC": "ec", + "es-ES": "es", + "es-GQ": "gq", + "es-GT": "gt", + "es-HN": "hn", + "es-MX": "mx", + "es-NI": "ni", + "es-PA": "pa", + "es-PE": "pe", + "es-PR": "pr", + "es-PY": "py", + "es-SV": "sv", + "es-UY": "uy", + "es-VE": "ve", + "et-EE": "ee", + "fa-AF": "af", + "fa-IR": "ir", + "fi-FI": "fi", + "fil-PH": "ph", + "fr-BE": "be", + "fr-BF": "bf", + "fr-BI": "bi", + "fr-BJ": "bj", + "fr-BL": "bl", + "fr-CA": "ca", + "fr-CD": "cd", + "fr-CF": "cf", + "fr-CG": "cg", + "fr-CH": "ch", + "fr-CI": "ci", + "fr-CM": "cm", + "fr-DJ": "dj", + "fr-DZ": "dz", + "fr-FR": "fr", + "fr-GA": "ga", + "fr-GF": "gf", + "fr-GN": "gn", + "fr-GP": "gp", + "fr-GQ": "gq", + "fr-HT": "ht", + "fr-KM": "km", + "fr-LU": "lu", + "fr-MA": "ma", + "fr-MC": "mc", + "fr-MF": "mf", + "fr-MG": "mg", + "fr-ML": "ml", + "fr-MQ": "mq", + "fr-MU": "mu", + "fr-NC": "nc", + "fr-NE": "ne", + "fr-PF": "pf", + "fr-PM": "pm", + "fr-RE": "re", + "fr-RW": "rw", + "fr-SC": "sc", + "fr-SN": "sn", + "fr-SY": "sy", + "fr-TD": "td", + "fr-TG": "tg", + "fr-TN": "tn", + "fr-VU": "vu", + "fr-WF": "wf", + "fr-YT": "yt", + "ga-IE": "ie", + "he-IL": "il", + "hi-IN": "in", + "hr-BA": "ba", + "hr-HR": "hr", + "hu-HU": "hu", + "hy-AM": "am", + "id-ID": "id", + "is-IS": "is", + "it-CH": "ch", + "it-IT": "it", + "it-SM": "sm", + "it-VA": "va", + "ja-JP": "jp", + "ka-GE": "ge", + "kk-KZ": "kz", + "km-KH": "kh", + "ko-KR": "kr", + "ky-KG": "kg", + "lb-LU": "lu", + "lo-LA": "la", + "lt-LT": "lt", + "lv-LV": "lv", + "mi-NZ": "nz", + "mk-MK": "mk", + "mn-MN": "mn", + "ms-BN": "bn", + "ms-MY": "my", + "ms-SG": "sg", + "mt-MT": "mt", + "nb-NO": "no", + "ne-NP": "np", + "nl-AW": "aw", + "nl-BE": "be", + "nl-CW": "cw", + "nl-NL": "nl", + "nl-SR": "sr", + "nl-SX": "sx", + "nn-NO": "no", + "pl-PL": "pl", + "pt-AO": "ao", + "pt-BR": "br", + "pt-CV": "cv", + "pt-GQ": "gq", + "pt-GW": "gw", + "pt-MO": "mo", + "pt-MZ": "mz", + "pt-PT": "pt", + "pt-ST": "st", + "qu-BO": "bo", + "qu-EC": "ec", + "qu-PE": "pe", + "ro-MD": "md", + "ro-RO": "ro", + "ru-BY": "by", + "ru-KG": "kg", + "ru-KZ": "kz", + "ru-RU": "ru", + "ru-UA": "ua", + "rw-RW": "rw", + "si-LK": "lk", + "sk-SK": "sk", + "sl-SI": "si", + "sq-AL": "al", + "sr-BA": "ba", + "sr-ME": "me", + "sr-RS": "rs", + "sv-FI": "fi", + "sv-SE": "se", + "sw-KE": "ke", + "sw-TZ": "tz", + "sw-UG": "ug", + "ta-LK": "lk", + "ta-SG": "sg", + "tg-TJ": "tj", + "th-TH": "th", + "ti-ER": "er", + "tk-TM": "tm", + "tr-CY": "cy", + "tr-TR": "tr", + "uk-UA": "ua", + "ur-PK": "pk", + "uz-UZ": "uz", + "vi-VN": "vn", + "wo-SN": "sn", + "yo-NG": "ng", + "zh-CN": "cn", + "zh-HK": "hk", + "zh-MO": "mo", + "zh-SG": "sg", + "zh-TW": "tw" + }, "supported_languages": [ "af", "am", diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 783c0056a..81b051797 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -12,6 +12,10 @@ from lxml import html from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex from searx.network import multi_requests, Request +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits + about = { "website": 'https://www.bing.com', "wikidata_id": 'Q182496', @@ -181,3 +185,96 @@ def _fetch_supported_languages(resp): lang_tags.add(tag) return list(lang_tags) + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages and regions from bing.""" + + # pylint: disable=import-outside-toplevel, disable=too-many-branches, + # pylint: disable=too-many-locals, too-many-statements + + engine_traits.data_type = 'supported_languages' # deprecated + + import babel + import babel.languages + from searx import network + from searx.locales import get_offical_locales, language_tag, region_tag + from searx.utils import gen_useragent + + headers = { + 'User-Agent': gen_useragent(), + 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language + } + resp = network.get('https://www.bing.com/account/general', headers=headers) + + if not resp.ok: + print("ERROR: response from peertube is not OK.") + + dom = html.fromstring(resp.text) + + # Selector to get items from "Display language" + + lang_map = { + 'prs': 'fa', # Persian + 'pt_BR': 'pt', # Portuguese (Brasil) + 'pt_PT': 'pt', # Portuguese (Portugal) + 'ca-ES-VALENCIA': 'ca', # Catalan (Spain, Valencian) + } + + unknow_langs = [ + 'quc', # K'iche' + 'nso', # Sesotho sa Leboa + 'tn', # Setswana + ] + + for div in eval_xpath(dom, '//div[@id="limit-languages"]//input/..'): + + eng_lang = eval_xpath(div, './/input/@value')[0] + if eng_lang in unknow_langs: + continue + + eng_lang = lang_map.get(eng_lang, eng_lang) + label = extract_text(eval_xpath(div, './/label')) + + # The 'language:xx' query string in the request function (above) does + # only support the language codes from the "Display languages" list. + # Examples of items from the "Display languages" not sopported in the + # query string: zh_Hans --> zh / sr_latn --> sr + # + # eng_lang = eng_lang.split('_')[0] + + try: + sxng_tag = language_tag(babel.Locale.parse(eng_lang.replace('-', '_'), sep='_')) + except babel.UnknownLocaleError: + print("ERROR: %s (%s) is unknown by babel" % (label, eng_lang)) + continue + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang)) + continue + engine_traits.languages[sxng_tag] = eng_lang + + engine_traits.languages['zh'] = 'zh_Hans' + + # regiones + + for a in eval_xpath(dom, '//div[@id="region-section-content"]//li/a'): + href = eval_xpath(a, './/@href')[0] + # lang_name = extract_text(a) + query = urlparse(href)[4] + query = parse_qs(query, keep_blank_values=True) + cc = query.get('cc')[0] # pylint:disable=invalid-name + if cc == 'clear': + continue + + # Assert babel supports this locales + sxng_locales = get_offical_locales(cc.upper(), engine_traits.languages.keys()) + + if not sxng_locales: + # print("ERROR: can't map from bing country %s (%s) to a babel region." % (a.text_content().strip(), cc)) + continue + + for sxng_locale in sxng_locales: + engine_traits.regions[region_tag(sxng_locale)] = cc diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 107ce3cff..19f3ad01a 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -13,6 +13,7 @@ from searx.utils import match_language from searx.engines.bing import language_aliases from searx.engines.bing import ( # pylint: disable=unused-import _fetch_supported_languages, + fetch_traits, supported_languages_url, ) diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 7eea17bb4..8a5f70259 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -17,6 +17,7 @@ from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import ( # pylint: disable=unused-import language_aliases, _fetch_supported_languages, + fetch_traits, supported_languages_url, ) diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 85071de21..4946eef64 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -14,6 +14,7 @@ from searx.engines.bing import language_aliases from searx.engines.bing import ( # pylint: disable=unused-import _fetch_supported_languages, + fetch_traits, supported_languages_url, )