2018-08-15 12:26:23 +02:00
|
|
|
#!/usr/bin/env python3
|
2013-02-09 19:55:04 -07:00
|
|
|
|
|
|
|
# Script by Ben Limmer
|
|
|
|
# https://github.com/l1m5
|
|
|
|
#
|
2015-11-20 22:10:49 -05:00
|
|
|
# This Python script will combine all the host files you provide
|
2020-04-08 07:36:24 +03:00
|
|
|
# as sources into one, unique host file to keep your internet browsing happy.
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2018-02-28 23:06:58 +01:00
|
|
|
import argparse
|
|
|
|
import fnmatch
|
2023-09-03 11:22:38 +02:00
|
|
|
import ipaddress
|
2018-02-28 23:06:58 +01:00
|
|
|
import json
|
2017-05-19 16:13:02 -04:00
|
|
|
import locale
|
2018-02-28 23:06:58 +01:00
|
|
|
import os
|
2013-07-13 15:57:11 -06:00
|
|
|
import platform
|
2022-07-05 12:39:02 -04:00
|
|
|
from pathlib import Path
|
2013-02-17 13:51:49 -07:00
|
|
|
import re
|
2016-02-14 23:15:22 +00:00
|
|
|
import shutil
|
2018-02-28 23:06:58 +01:00
|
|
|
import socket
|
2013-07-13 15:57:11 -06:00
|
|
|
import subprocess
|
2013-02-09 19:55:04 -07:00
|
|
|
import sys
|
|
|
|
import tempfile
|
2016-02-14 23:15:22 +00:00
|
|
|
import time
|
2018-02-28 23:06:58 +01:00
|
|
|
from glob import glob
|
2021-03-31 14:51:54 +02:00
|
|
|
from typing import Optional, Tuple
|
2018-02-28 23:06:58 +01:00
|
|
|
|
2017-05-14 22:30:36 -04:00
|
|
|
# Detecting Python 3 for version-dependent implementations
|
|
|
|
PY3 = sys.version_info >= (3, 0)
|
|
|
|
|
2020-08-17 19:52:11 -04:00
|
|
|
if not PY3:
|
2019-07-13 13:40:13 +02:00
|
|
|
raise Exception("We do not support Python 2 anymore.")
|
2016-03-23 23:36:47 -04:00
|
|
|
|
2020-08-27 21:37:54 -04:00
|
|
|
|
|
|
|
try:
|
|
|
|
import requests
|
2020-08-28 01:51:14 -04:00
|
|
|
except ImportError:
|
2021-03-31 14:57:56 +02:00
|
|
|
raise ImportError(
|
|
|
|
"This project's dependencies have changed. The Requests library ("
|
2021-12-12 16:10:35 +02:00
|
|
|
"https://docs.python-requests.org/en/latest/) is now required."
|
2021-03-31 14:57:56 +02:00
|
|
|
)
|
2020-08-27 21:37:54 -04:00
|
|
|
|
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
# Syntactic sugar for "sudo" command in UNIX / Linux
|
2018-03-03 22:55:18 +01:00
|
|
|
if platform.system() == "OpenBSD":
|
|
|
|
SUDO = ["/usr/bin/doas"]
|
2022-07-05 12:54:31 -04:00
|
|
|
elif platform.system() == "Windows":
|
2022-07-05 13:19:01 -04:00
|
|
|
SUDO = ["powershell", "Start-Process", "powershell", "-Verb", "runAs"]
|
2018-03-03 22:55:18 +01:00
|
|
|
else:
|
|
|
|
SUDO = ["/usr/bin/env", "sudo"]
|
2017-05-23 21:23:49 -04:00
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# Project Settings
|
2016-03-25 23:44:54 -04:00
|
|
|
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2017-05-19 16:13:02 -04:00
|
|
|
|
|
|
|
def get_defaults():
|
|
|
|
"""
|
|
|
|
Helper method for getting the default settings.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
default_settings : dict
|
|
|
|
A dictionary of the default settings when updating host information.
|
|
|
|
"""
|
|
|
|
|
|
|
|
return {
|
|
|
|
"numberofrules": 0,
|
|
|
|
"datapath": path_join_robust(BASEDIR_PATH, "data"),
|
|
|
|
"freshen": True,
|
|
|
|
"replace": False,
|
|
|
|
"backup": False,
|
|
|
|
"skipstatichosts": False,
|
2018-09-14 00:15:42 +02:00
|
|
|
"keepdomaincomments": True,
|
2017-05-19 16:13:02 -04:00
|
|
|
"extensionspath": path_join_robust(BASEDIR_PATH, "extensions"),
|
|
|
|
"extensions": [],
|
2023-05-23 20:03:43 +02:00
|
|
|
"nounifiedhosts": False,
|
2017-12-30 17:12:04 +01:00
|
|
|
"compress": False,
|
2018-02-16 08:26:16 +05:30
|
|
|
"minimise": False,
|
2017-05-19 16:13:02 -04:00
|
|
|
"outputsubfolder": "",
|
|
|
|
"hostfilename": "hosts",
|
|
|
|
"targetip": "0.0.0.0",
|
|
|
|
"sourcedatafilename": "update.json",
|
|
|
|
"sourcesdata": [],
|
|
|
|
"readmefilename": "readme.md",
|
2018-03-19 23:48:22 -04:00
|
|
|
"readmetemplate": path_join_robust(BASEDIR_PATH, "readme_template.md"),
|
2017-05-19 16:13:02 -04:00
|
|
|
"readmedata": {},
|
2018-03-19 23:48:22 -04:00
|
|
|
"readmedatafilename": path_join_robust(BASEDIR_PATH, "readmeData.json"),
|
2018-08-10 17:43:18 +02:00
|
|
|
"exclusionpattern": r"([a-zA-Z\d-]+\.){0,}",
|
2021-04-26 16:39:07 -04:00
|
|
|
"exclusionregexes": [],
|
2017-05-19 16:13:02 -04:00
|
|
|
"exclusions": [],
|
|
|
|
"commonexclusions": ["hulu.com"],
|
|
|
|
"blacklistfile": path_join_robust(BASEDIR_PATH, "blacklist"),
|
2019-07-13 13:40:13 +02:00
|
|
|
"whitelistfile": path_join_robust(BASEDIR_PATH, "whitelist"),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
# End Project Settings
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2016-02-15 17:06:38 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
def main():
|
2019-07-13 13:40:13 +02:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description="Creates a unified hosts "
|
2020-04-23 09:02:38 +03:00
|
|
|
"file from hosts stored in the data subfolders."
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--auto",
|
|
|
|
"-a",
|
|
|
|
dest="auto",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
|
|
|
help="Run without prompting.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--backup",
|
|
|
|
"-b",
|
|
|
|
dest="backup",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Backup the hosts files before they are overridden.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--extensions",
|
|
|
|
"-e",
|
|
|
|
dest="extensions",
|
|
|
|
default=[],
|
|
|
|
nargs="*",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Host extensions to include in the final hosts file.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
2023-05-23 20:03:43 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"--nounifiedhosts",
|
|
|
|
dest="nounifiedhosts",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
|
|
|
help="Do not include the unified hosts file in the final hosts file. Usually used together with `--extensions`.",
|
|
|
|
)
|
2019-07-13 13:40:13 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"--ip",
|
|
|
|
"-i",
|
|
|
|
dest="targetip",
|
|
|
|
default="0.0.0.0",
|
|
|
|
help="Target IP address. Default is 0.0.0.0.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--keepdomaincomments",
|
|
|
|
"-k",
|
|
|
|
dest="keepdomaincomments",
|
|
|
|
action="store_false",
|
|
|
|
default=True,
|
|
|
|
help="Do not keep domain line comments.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--noupdate",
|
|
|
|
"-n",
|
|
|
|
dest="noupdate",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Don't update from host data sources.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--skipstatichosts",
|
|
|
|
"-s",
|
|
|
|
dest="skipstatichosts",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Skip static localhost entries in the final hosts file.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
2020-04-07 20:08:23 -07:00
|
|
|
parser.add_argument(
|
2020-04-07 22:55:02 -07:00
|
|
|
"--nogendata",
|
2020-04-07 22:40:30 -07:00
|
|
|
"-g",
|
2020-04-07 22:55:02 -07:00
|
|
|
dest="nogendata",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-07 22:40:30 -07:00
|
|
|
help="Skip generation of readmeData.json",
|
2020-04-07 20:08:23 -07:00
|
|
|
)
|
2019-07-13 13:40:13 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"--output",
|
|
|
|
"-o",
|
|
|
|
dest="outputsubfolder",
|
|
|
|
default="",
|
|
|
|
help="Output subfolder for generated hosts file.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--replace",
|
|
|
|
"-r",
|
|
|
|
dest="replace",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Replace your active hosts file with this new hosts file.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--flush-dns-cache",
|
|
|
|
"-f",
|
|
|
|
dest="flushdnscache",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Attempt to flush DNS cache after replacing the hosts file.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--compress",
|
|
|
|
"-c",
|
|
|
|
dest="compress",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Compress the hosts file ignoring non-necessary lines "
|
|
|
|
"(empty lines and comments) and putting multiple domains in "
|
|
|
|
"each line. Improve the performance under Windows.",
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--minimise",
|
|
|
|
"-m",
|
|
|
|
dest="minimise",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
2020-04-23 09:02:38 +03:00
|
|
|
help="Minimise the hosts file ignoring non-necessary lines "
|
2019-07-13 13:40:13 +02:00
|
|
|
"(empty lines and comments).",
|
|
|
|
)
|
2020-04-13 12:14:34 -07:00
|
|
|
parser.add_argument(
|
|
|
|
"--whitelist",
|
|
|
|
"-w",
|
|
|
|
dest="whitelistfile",
|
2020-04-20 10:22:13 -04:00
|
|
|
default=path_join_robust(BASEDIR_PATH, "whitelist"),
|
2020-04-13 12:14:34 -07:00
|
|
|
help="Whitelist file to use while generating hosts files.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--blacklist",
|
|
|
|
"-x",
|
|
|
|
dest="blacklistfile",
|
2020-04-13 16:20:22 -07:00
|
|
|
default=path_join_robust(BASEDIR_PATH, "blacklist"),
|
2020-04-13 12:14:34 -07:00
|
|
|
help="Blacklist file to use while generating hosts files.",
|
|
|
|
)
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
global settings
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
options = vars(parser.parse_args())
|
2016-03-12 23:44:49 -05:00
|
|
|
|
2018-03-19 23:48:22 -04:00
|
|
|
options["outputpath"] = path_join_robust(BASEDIR_PATH, options["outputsubfolder"])
|
2016-12-18 10:55:35 -05:00
|
|
|
options["freshen"] = not options["noupdate"]
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2017-05-19 16:13:02 -04:00
|
|
|
settings = get_defaults()
|
2016-12-18 10:55:35 -05:00
|
|
|
settings.update(options)
|
2016-03-24 01:10:13 -04:00
|
|
|
|
2017-08-13 04:22:42 -07:00
|
|
|
data_path = settings["datapath"]
|
|
|
|
extensions_path = settings["extensionspath"]
|
|
|
|
|
|
|
|
settings["sources"] = list_dir_no_hidden(data_path)
|
|
|
|
settings["extensionsources"] = list_dir_no_hidden(extensions_path)
|
2016-03-28 23:04:34 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
# All our extensions folders...
|
2019-07-13 13:40:13 +02:00
|
|
|
settings["extensions"] = [
|
|
|
|
os.path.basename(item) for item in list_dir_no_hidden(extensions_path)
|
|
|
|
]
|
2016-12-18 10:55:35 -05:00
|
|
|
# ... intersected with the extensions passed-in as arguments, then sorted.
|
2019-07-13 13:40:13 +02:00
|
|
|
settings["extensions"] = sorted(
|
|
|
|
list(set(options["extensions"]).intersection(settings["extensions"]))
|
|
|
|
)
|
2016-02-22 10:22:26 -05:00
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
auto = settings["auto"]
|
2021-04-26 16:39:07 -04:00
|
|
|
exclusion_regexes = settings["exclusionregexes"]
|
2017-08-13 04:22:42 -07:00
|
|
|
source_data_filename = settings["sourcedatafilename"]
|
2023-05-23 20:03:43 +02:00
|
|
|
no_unified_hosts = settings["nounifiedhosts"]
|
2017-07-09 12:00:11 -07:00
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
update_sources = prompt_for_update(freshen=settings["freshen"], update_auto=auto)
|
2017-08-07 21:18:35 -07:00
|
|
|
if update_sources:
|
2017-08-13 04:22:42 -07:00
|
|
|
update_all_sources(source_data_filename, settings["hostfilename"])
|
2017-08-07 21:18:35 -07:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
gather_exclusions = prompt_for_exclusions(skip_prompt=auto)
|
|
|
|
|
|
|
|
if gather_exclusions:
|
|
|
|
common_exclusions = settings["commonexclusions"]
|
|
|
|
exclusion_pattern = settings["exclusionpattern"]
|
|
|
|
exclusion_regexes = display_exclusion_options(
|
|
|
|
common_exclusions=common_exclusions,
|
|
|
|
exclusion_pattern=exclusion_pattern,
|
2019-07-13 13:40:13 +02:00
|
|
|
exclusion_regexes=exclusion_regexes,
|
|
|
|
)
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
extensions = settings["extensions"]
|
2019-07-13 13:40:13 +02:00
|
|
|
sources_data = update_sources_data(
|
|
|
|
settings["sourcesdata"],
|
|
|
|
datapath=data_path,
|
|
|
|
extensions=extensions,
|
|
|
|
extensionspath=extensions_path,
|
|
|
|
sourcedatafilename=source_data_filename,
|
2023-05-23 20:03:43 +02:00
|
|
|
nounifiedhosts=no_unified_hosts,
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2023-05-23 20:03:43 +02:00
|
|
|
merge_file = create_initial_file(
|
|
|
|
nounifiedhosts=no_unified_hosts,
|
|
|
|
)
|
2023-05-24 17:51:35 +02:00
|
|
|
remove_old_hosts_file(settings["outputpath"], "hosts", settings["backup"])
|
2017-12-30 17:12:04 +01:00
|
|
|
if settings["compress"]:
|
2018-08-10 15:51:45 +02:00
|
|
|
final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b")
|
2017-12-30 17:12:04 +01:00
|
|
|
compressed_file = tempfile.NamedTemporaryFile()
|
|
|
|
remove_dups_and_excl(merge_file, exclusion_regexes, compressed_file)
|
|
|
|
compress_file(compressed_file, settings["targetip"], final_file)
|
2018-02-16 08:26:16 +05:30
|
|
|
elif settings["minimise"]:
|
2018-08-10 15:51:45 +02:00
|
|
|
final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b")
|
2018-02-16 08:26:16 +05:30
|
|
|
minimised_file = tempfile.NamedTemporaryFile()
|
|
|
|
remove_dups_and_excl(merge_file, exclusion_regexes, minimised_file)
|
|
|
|
minimise_file(minimised_file, settings["targetip"], final_file)
|
2017-12-30 17:12:04 +01:00
|
|
|
else:
|
|
|
|
final_file = remove_dups_and_excl(merge_file, exclusion_regexes)
|
2017-07-08 20:20:03 -05:00
|
|
|
|
|
|
|
number_of_rules = settings["numberofrules"]
|
2017-08-13 04:22:42 -07:00
|
|
|
output_subfolder = settings["outputsubfolder"]
|
2017-07-09 12:00:11 -07:00
|
|
|
skip_static_hosts = settings["skipstatichosts"]
|
2017-07-08 20:20:03 -05:00
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
write_opening_header(
|
|
|
|
final_file,
|
|
|
|
extensions=extensions,
|
|
|
|
numberofrules=number_of_rules,
|
|
|
|
outputsubfolder=output_subfolder,
|
|
|
|
skipstatichosts=skip_static_hosts,
|
2023-05-23 20:03:43 +02:00
|
|
|
nounifiedhosts=no_unified_hosts,
|
2019-07-13 13:40:13 +02:00
|
|
|
)
|
2017-05-17 23:17:37 -04:00
|
|
|
final_file.close()
|
2016-10-16 23:07:06 -04:00
|
|
|
|
2020-04-07 22:55:02 -07:00
|
|
|
if not settings["nogendata"]:
|
2020-04-07 20:08:23 -07:00
|
|
|
update_readme_data(
|
|
|
|
settings["readmedatafilename"],
|
|
|
|
extensions=extensions,
|
|
|
|
numberofrules=number_of_rules,
|
|
|
|
outputsubfolder=output_subfolder,
|
|
|
|
sourcesdata=sources_data,
|
2023-05-23 20:03:43 +02:00
|
|
|
nounifiedhosts=no_unified_hosts,
|
2020-04-07 20:08:23 -07:00
|
|
|
)
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
print_success(
|
|
|
|
"Success! The hosts file has been saved in folder "
|
|
|
|
+ output_subfolder
|
|
|
|
+ "\nIt contains "
|
|
|
|
+ "{:,}".format(number_of_rules)
|
|
|
|
+ " unique entries."
|
|
|
|
)
|
|
|
|
|
|
|
|
move_file = prompt_for_move(
|
|
|
|
final_file,
|
|
|
|
auto=auto,
|
|
|
|
replace=settings["replace"],
|
|
|
|
skipstatichosts=skip_static_hosts,
|
|
|
|
)
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
# We only flush the DNS cache if we have
|
|
|
|
# moved a new hosts file into place.
|
|
|
|
if move_file:
|
2019-07-13 13:40:13 +02:00
|
|
|
prompt_for_flush_dns_cache(
|
|
|
|
flush_cache=settings["flushdnscache"], prompt_flush=not auto
|
|
|
|
)
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# Prompt the User
|
2017-07-09 12:00:11 -07:00
|
|
|
def prompt_for_update(freshen, update_auto):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Prompt the user to update all hosts files.
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
If requested, the function will update all data sources after it
|
|
|
|
checks that a hosts file does indeed exist.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
freshen : bool
|
|
|
|
Whether data sources should be updated. This function will return
|
|
|
|
if it is requested that data sources not be updated.
|
|
|
|
update_auto : bool
|
|
|
|
Whether or not to automatically update all data sources.
|
2017-08-07 21:18:35 -07:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
update_sources : bool
|
|
|
|
Whether or not we should update data sources for exclusion files.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
# Create a hosts file if it doesn't exist.
|
|
|
|
hosts_file = path_join_robust(BASEDIR_PATH, "hosts")
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
if not os.path.isfile(hosts_file):
|
|
|
|
try:
|
|
|
|
open(hosts_file, "w+").close()
|
|
|
|
except (IOError, OSError):
|
|
|
|
# Starting in Python 3.3, IOError is aliased
|
|
|
|
# OSError. However, we have to catch both for
|
|
|
|
# Python 2.x failures.
|
2019-07-13 13:40:13 +02:00
|
|
|
print_failure(
|
|
|
|
"ERROR: No 'hosts' file in the folder. Try creating one manually."
|
|
|
|
)
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
if not freshen:
|
2023-12-12 07:34:10 +02:00
|
|
|
return False
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
prompt = "Do you want to update all data sources?"
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
if update_auto or query_yes_no(prompt):
|
2017-08-07 21:18:35 -07:00
|
|
|
return True
|
2017-07-09 12:00:11 -07:00
|
|
|
elif not update_auto:
|
2017-06-29 20:55:41 -07:00
|
|
|
print("OK, we'll stick with what we've got locally.")
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2017-08-07 21:18:35 -07:00
|
|
|
return False
|
|
|
|
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
def prompt_for_exclusions(skip_prompt):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Prompt the user to exclude any custom domains from being blocked.
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
skip_prompt : bool
|
|
|
|
Whether or not to skip prompting for custom domains to be excluded.
|
|
|
|
If true, the function returns immediately.
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
gather_exclusions : bool
|
|
|
|
Whether or not we should proceed to prompt the user to exclude any
|
|
|
|
custom domains beyond those in the whitelist.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
prompt = (
|
|
|
|
"Do you want to exclude any domains?\n"
|
|
|
|
"For example, hulu.com video streaming must be able to access "
|
|
|
|
"its tracking and ad servers in order to play video."
|
|
|
|
)
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
if not skip_prompt:
|
2017-05-14 00:09:36 -04:00
|
|
|
if query_yes_no(prompt):
|
2017-07-21 01:19:59 -07:00
|
|
|
return True
|
2017-05-14 00:09:36 -04:00
|
|
|
else:
|
|
|
|
print("OK, we'll only exclude domains in the whitelist.")
|
2013-02-17 14:39:40 -07:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
return False
|
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
def prompt_for_flush_dns_cache(flush_cache, prompt_flush):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Prompt the user to flush the DNS cache.
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
flush_cache : bool
|
|
|
|
Whether to flush the DNS cache without prompting.
|
|
|
|
prompt_flush : bool
|
|
|
|
If `flush_cache` is False, whether we should prompt for flushing the
|
|
|
|
cache. Otherwise, the function returns immediately.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
if flush_cache:
|
2017-05-13 23:38:59 -04:00
|
|
|
flush_dns_cache()
|
2017-07-09 12:00:11 -07:00
|
|
|
elif prompt_flush:
|
2017-05-14 00:09:36 -04:00
|
|
|
if query_yes_no("Attempt to flush the DNS cache?"):
|
2017-05-13 23:38:59 -04:00
|
|
|
flush_dns_cache()
|
2016-06-12 14:04:38 +02:00
|
|
|
|
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
def prompt_for_move(final_file, **move_params):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Prompt the user to move the newly created hosts file to its designated
|
|
|
|
location in the OS.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
final_file : file
|
|
|
|
The file object that contains the newly created hosts data.
|
2017-07-09 12:00:11 -07:00
|
|
|
move_params : kwargs
|
|
|
|
Dictionary providing additional parameters for moving the hosts file
|
|
|
|
into place. Currently, those fields are:
|
|
|
|
|
|
|
|
1) auto
|
|
|
|
2) replace
|
|
|
|
3) skipstatichosts
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
move_file : bool
|
|
|
|
Whether or not the final hosts file was moved.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-07-09 12:00:11 -07:00
|
|
|
skip_static_hosts = move_params["skipstatichosts"]
|
|
|
|
|
|
|
|
if move_params["replace"] and not skip_static_hosts:
|
2017-05-14 00:09:36 -04:00
|
|
|
move_file = True
|
2017-07-09 12:00:11 -07:00
|
|
|
elif move_params["auto"] or skip_static_hosts:
|
2017-05-14 00:09:36 -04:00
|
|
|
move_file = False
|
2016-12-18 10:55:35 -05:00
|
|
|
else:
|
2018-09-06 21:12:42 -07:00
|
|
|
prompt = "Do you want to replace your existing hosts file with the newly generated file?"
|
2017-05-14 00:09:36 -04:00
|
|
|
move_file = query_yes_no(prompt)
|
|
|
|
|
|
|
|
if move_file:
|
2022-07-05 12:39:02 -04:00
|
|
|
move_file = move_hosts_file_into_place(final_file)
|
2017-07-09 12:00:11 -07:00
|
|
|
|
|
|
|
return move_file
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# End Prompt the User
|
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2020-04-21 22:37:37 +02:00
|
|
|
def sort_sources(sources):
|
|
|
|
"""
|
|
|
|
Sorts the sources.
|
|
|
|
The idea is that all Steven Black's list, file or entries
|
|
|
|
get on top and the rest sorted alphabetically.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
sources: list
|
|
|
|
The sources to sort.
|
|
|
|
"""
|
|
|
|
|
|
|
|
result = sorted(
|
|
|
|
sources.copy(),
|
|
|
|
key=lambda x: x.lower().replace("-", "").replace("_", "").replace(" ", ""),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Steven Black's repositories/files/lists should be on top!
|
|
|
|
steven_black_positions = [
|
|
|
|
x for x, y in enumerate(result) if "stevenblack" in y.lower()
|
|
|
|
]
|
|
|
|
|
|
|
|
for index in steven_black_positions:
|
|
|
|
result.insert(0, result.pop(index))
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# Exclusion logic
|
2018-03-19 23:48:22 -04:00
|
|
|
def display_exclusion_options(common_exclusions, exclusion_pattern, exclusion_regexes):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Display the exclusion options to the user.
|
|
|
|
|
|
|
|
This function checks whether a user wants to exclude particular domains,
|
|
|
|
and if so, excludes them.
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
common_exclusions : list
|
|
|
|
A list of common domains that are excluded from being blocked. One
|
|
|
|
example is Hulu. This setting is set directly in the script and cannot
|
|
|
|
be overwritten by the user.
|
|
|
|
exclusion_pattern : str
|
|
|
|
The exclusion pattern with which to create the domain regex.
|
|
|
|
exclusion_regexes : list
|
|
|
|
The list of regex patterns used to exclude domains.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
aug_exclusion_regexes : list
|
|
|
|
The original list of regex patterns potentially with additional
|
2020-04-23 09:02:38 +03:00
|
|
|
patterns from domains that the user chooses to exclude.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
for exclusion_option in common_exclusions:
|
2017-05-15 11:51:17 -04:00
|
|
|
prompt = "Do you want to exclude the domain " + exclusion_option + " ?"
|
2017-05-14 00:09:36 -04:00
|
|
|
|
|
|
|
if query_yes_no(prompt):
|
2019-07-13 13:40:13 +02:00
|
|
|
exclusion_regexes = exclude_domain(
|
|
|
|
exclusion_option, exclusion_pattern, exclusion_regexes
|
|
|
|
)
|
2016-12-18 10:55:35 -05:00
|
|
|
else:
|
|
|
|
continue
|
2017-05-14 00:09:36 -04:00
|
|
|
|
|
|
|
if query_yes_no("Do you want to exclude any other domains?"):
|
2019-07-13 13:40:13 +02:00
|
|
|
exclusion_regexes = gather_custom_exclusions(
|
|
|
|
exclusion_pattern, exclusion_regexes
|
|
|
|
)
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
return exclusion_regexes
|
2017-05-14 14:19:57 -04:00
|
|
|
|
2015-10-28 19:33:16 -04:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
def gather_custom_exclusions(exclusion_pattern, exclusion_regexes):
|
2017-05-14 14:19:57 -04:00
|
|
|
"""
|
|
|
|
Gather custom exclusions from the user.
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
exclusion_pattern : str
|
|
|
|
The exclusion pattern with which to create the domain regex.
|
|
|
|
exclusion_regexes : list
|
|
|
|
The list of regex patterns used to exclude domains.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
aug_exclusion_regexes : list
|
|
|
|
The original list of regex patterns potentially with additional
|
2020-04-23 09:02:38 +03:00
|
|
|
patterns from domains that the user chooses to exclude.
|
2017-05-14 14:19:57 -04:00
|
|
|
"""
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2017-05-14 14:19:57 -04:00
|
|
|
# We continue running this while-loop until the user
|
|
|
|
# says that they have no more domains to exclude.
|
2016-12-18 10:55:35 -05:00
|
|
|
while True:
|
2018-09-06 21:12:42 -07:00
|
|
|
domain_prompt = "Enter the domain you want to exclude (e.g. facebook.com): "
|
2018-08-10 15:51:45 +02:00
|
|
|
user_domain = input(domain_prompt)
|
2017-05-14 14:19:57 -04:00
|
|
|
|
2021-04-26 17:08:38 -04:00
|
|
|
if is_valid_user_provided_domain_format(user_domain):
|
2019-07-13 13:40:13 +02:00
|
|
|
exclusion_regexes = exclude_domain(
|
|
|
|
user_domain, exclusion_pattern, exclusion_regexes
|
|
|
|
)
|
2017-05-14 14:19:57 -04:00
|
|
|
|
|
|
|
continue_prompt = "Do you have more domains you want to enter?"
|
|
|
|
if not query_yes_no(continue_prompt):
|
2017-07-21 01:19:59 -07:00
|
|
|
break
|
|
|
|
|
|
|
|
return exclusion_regexes
|
2013-02-17 13:51:49 -07:00
|
|
|
|
2017-05-14 14:19:57 -04:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
def exclude_domain(domain, exclusion_pattern, exclusion_regexes):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Exclude a domain from being blocked.
|
|
|
|
|
2020-04-23 09:02:38 +03:00
|
|
|
This creates the domain regex by which to exclude this domain and appends
|
2017-07-21 01:19:59 -07:00
|
|
|
it a list of already-existing exclusion regexes.
|
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
domain : str
|
|
|
|
The filename or regex pattern to exclude.
|
2017-07-21 01:19:59 -07:00
|
|
|
exclusion_pattern : str
|
|
|
|
The exclusion pattern with which to create the domain regex.
|
|
|
|
exclusion_regexes : list
|
|
|
|
The list of regex patterns used to exclude domains.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
aug_exclusion_regexes : list
|
|
|
|
The original list of regex patterns with one additional pattern from
|
|
|
|
the `domain` input.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
exclusion_regex = re.compile(exclusion_pattern + domain)
|
|
|
|
exclusion_regexes.append(exclusion_regex)
|
2013-02-17 13:51:49 -07:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
return exclusion_regexes
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
def matches_exclusions(stripped_rule, exclusion_regexes):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Check whether a rule matches an exclusion rule we already provided.
|
|
|
|
|
|
|
|
If this function returns True, that means this rule should be excluded
|
|
|
|
from the final hosts file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
stripped_rule : str
|
|
|
|
The rule that we are checking.
|
2017-07-21 01:19:59 -07:00
|
|
|
exclusion_regexes : list
|
|
|
|
The list of regex patterns used to exclude domains.
|
2017-05-17 23:17:37 -04:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
matches_exclusion : bool
|
|
|
|
Whether or not the rule string matches a provided exclusion.
|
|
|
|
"""
|
|
|
|
|
2021-03-31 14:51:54 +02:00
|
|
|
try:
|
|
|
|
stripped_domain = stripped_rule.split()[1]
|
|
|
|
except IndexError:
|
|
|
|
# Example: 'example.org' instead of '0.0.0.0 example.org'
|
|
|
|
stripped_domain = stripped_rule
|
2017-07-21 01:19:59 -07:00
|
|
|
|
|
|
|
for exclusionRegex in exclusion_regexes:
|
2017-05-15 11:51:17 -04:00
|
|
|
if exclusionRegex.search(stripped_domain):
|
2016-12-18 10:55:35 -05:00
|
|
|
return True
|
2017-07-21 01:19:59 -07:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
return False
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# End Exclusion Logic
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# Update Logic
|
2017-08-13 04:22:42 -07:00
|
|
|
def update_sources_data(sources_data, **sources_params):
|
|
|
|
"""
|
|
|
|
Update the sources data and information for each source.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
sources_data : list
|
|
|
|
The list of sources data that we are to update.
|
|
|
|
sources_params : kwargs
|
|
|
|
Dictionary providing additional parameters for updating the
|
|
|
|
sources data. Currently, those fields are:
|
|
|
|
|
|
|
|
1) datapath
|
|
|
|
2) extensions
|
|
|
|
3) extensionspath
|
|
|
|
4) sourcedatafilename
|
2023-05-23 20:03:43 +02:00
|
|
|
5) nounifiedhosts
|
2017-08-13 04:22:42 -07:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
update_sources_data : list
|
|
|
|
The original source data list with new source data appended.
|
|
|
|
"""
|
|
|
|
|
|
|
|
source_data_filename = sources_params["sourcedatafilename"]
|
|
|
|
|
2023-05-23 20:03:43 +02:00
|
|
|
if not sources_params["nounifiedhosts"]:
|
|
|
|
for source in sort_sources(
|
|
|
|
recursive_glob(sources_params["datapath"], source_data_filename)
|
|
|
|
):
|
|
|
|
update_file = open(source, "r", encoding="UTF-8")
|
2023-05-26 17:38:29 +02:00
|
|
|
try:
|
|
|
|
update_data = json.load(update_file)
|
|
|
|
sources_data.append(update_data)
|
|
|
|
finally:
|
|
|
|
update_file.close()
|
2017-08-13 04:22:42 -07:00
|
|
|
|
|
|
|
for source in sources_params["extensions"]:
|
2019-07-13 13:40:13 +02:00
|
|
|
source_dir = path_join_robust(sources_params["extensionspath"], source)
|
2020-04-21 22:37:37 +02:00
|
|
|
for update_file_path in sort_sources(
|
|
|
|
recursive_glob(source_dir, source_data_filename)
|
|
|
|
):
|
2017-08-13 04:22:42 -07:00
|
|
|
update_file = open(update_file_path, "r")
|
2023-05-26 17:38:29 +02:00
|
|
|
try:
|
|
|
|
update_data = json.load(update_file)
|
|
|
|
sources_data.append(update_data)
|
|
|
|
finally:
|
|
|
|
update_file.close()
|
2017-08-13 04:22:42 -07:00
|
|
|
|
|
|
|
return sources_data
|
|
|
|
|
|
|
|
|
2017-12-17 22:06:05 -05:00
|
|
|
def jsonarray(json_array_string):
|
|
|
|
"""
|
|
|
|
Transformer, converts a json array string hosts into one host per
|
|
|
|
line, prefixing each line with "127.0.0.1 ".
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
json_array_string : str
|
|
|
|
The json array string in the form
|
|
|
|
'["example1.com", "example1.com", ...]'
|
|
|
|
"""
|
|
|
|
|
|
|
|
temp_list = json.loads(json_array_string)
|
|
|
|
hostlines = "127.0.0.1 " + "\n127.0.0.1 ".join(temp_list)
|
|
|
|
return hostlines
|
|
|
|
|
2017-12-18 23:57:02 -05:00
|
|
|
|
2017-08-07 21:18:35 -07:00
|
|
|
def update_all_sources(source_data_filename, host_filename):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Update all host files, regardless of folder depth.
|
2017-08-07 21:18:35 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
source_data_filename : str
|
|
|
|
The name of the filename where information regarding updating
|
|
|
|
sources for a particular URL is stored. This filename is assumed
|
|
|
|
to be the same for all sources.
|
|
|
|
host_filename : str
|
|
|
|
The name of the file in which the updated source information
|
2020-04-23 09:02:38 +03:00
|
|
|
is stored for a particular URL. This filename is assumed to be
|
2017-08-07 21:18:35 -07:00
|
|
|
the same for all sources.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-12-17 22:06:05 -05:00
|
|
|
# The transforms we support
|
2019-07-13 13:40:13 +02:00
|
|
|
transform_methods = {"jsonarray": jsonarray}
|
2017-12-17 22:06:05 -05:00
|
|
|
|
2020-04-21 22:37:37 +02:00
|
|
|
all_sources = sort_sources(recursive_glob("*", source_data_filename))
|
2016-12-18 11:02:21 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
for source in all_sources:
|
2019-01-05 23:08:46 +08:00
|
|
|
update_file = open(source, "r", encoding="UTF-8")
|
2017-05-15 11:51:17 -04:00
|
|
|
update_data = json.load(update_file)
|
|
|
|
update_file.close()
|
2022-05-13 12:20:37 -04:00
|
|
|
|
|
|
|
# we can pause updating any given hosts source.
|
|
|
|
# if the update.json "pause" key is missing, don't pause.
|
2023-09-03 11:22:38 +02:00
|
|
|
if update_data.get("pause", False):
|
2022-05-13 12:20:37 -04:00
|
|
|
continue
|
|
|
|
|
2017-12-17 22:06:05 -05:00
|
|
|
update_url = update_data["url"]
|
|
|
|
update_transforms = []
|
|
|
|
if update_data.get("transforms"):
|
|
|
|
update_transforms = update_data["transforms"]
|
2017-05-14 22:30:36 -04:00
|
|
|
|
2018-03-19 23:48:22 -04:00
|
|
|
print("Updating source " + os.path.dirname(source) + " from " + update_url)
|
2017-05-14 22:30:36 -04:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
try:
|
2017-05-18 00:12:21 -04:00
|
|
|
updated_file = get_file_by_url(update_url)
|
|
|
|
|
2017-12-17 22:06:05 -05:00
|
|
|
# spin the transforms as required
|
2018-03-10 00:14:20 +05:30
|
|
|
for transform in update_transforms:
|
|
|
|
updated_file = transform_methods[transform](updated_file)
|
2017-12-17 22:06:05 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
# get rid of carriage-return symbols
|
|
|
|
updated_file = updated_file.replace("\r", "")
|
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
hosts_file = open(
|
|
|
|
path_join_robust(BASEDIR_PATH, os.path.dirname(source), host_filename),
|
|
|
|
"wb",
|
|
|
|
)
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(hosts_file, updated_file)
|
|
|
|
hosts_file.close()
|
2017-06-23 00:25:40 -07:00
|
|
|
except Exception:
|
2017-05-18 00:12:21 -04:00
|
|
|
print("Error in updating source: ", update_url)
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
|
2016-03-05 20:28:32 -05:00
|
|
|
# End Update Logic
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# File Logic
|
2023-05-23 20:03:43 +02:00
|
|
|
def create_initial_file(**initial_file_params):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Initialize the file in which we merge all host files for later pruning.
|
2023-05-23 20:03:43 +02:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
header_params : kwargs
|
|
|
|
Dictionary providing additional parameters for populating the initial file
|
|
|
|
information. Currently, those fields are:
|
|
|
|
|
|
|
|
1) nounifiedhosts
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
merge_file = tempfile.NamedTemporaryFile()
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2023-05-23 20:03:43 +02:00
|
|
|
if not initial_file_params["nounifiedhosts"]:
|
|
|
|
# spin the sources for the base file
|
|
|
|
for source in sort_sources(
|
|
|
|
recursive_glob(settings["datapath"], settings["hostfilename"])
|
|
|
|
):
|
|
|
|
start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source)))
|
|
|
|
end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source)))
|
2018-06-09 11:35:48 +02:00
|
|
|
|
2023-05-23 20:03:43 +02:00
|
|
|
with open(source, "r", encoding="UTF-8") as curFile:
|
|
|
|
write_data(merge_file, start + curFile.read() + end)
|
2016-12-18 10:55:35 -05:00
|
|
|
|
|
|
|
# spin the sources for extensions to the base file
|
|
|
|
for source in settings["extensions"]:
|
2020-04-21 23:54:01 +02:00
|
|
|
for filename in sort_sources(
|
|
|
|
recursive_glob(
|
|
|
|
path_join_robust(settings["extensionspath"], source),
|
|
|
|
settings["hostfilename"],
|
|
|
|
)
|
2019-07-13 13:40:13 +02:00
|
|
|
):
|
2016-12-18 19:57:55 -05:00
|
|
|
with open(filename, "r") as curFile:
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(merge_file, curFile.read())
|
|
|
|
|
2018-08-03 01:08:38 -07:00
|
|
|
maybe_copy_example_file(settings["blacklistfile"])
|
|
|
|
|
2018-09-08 21:19:51 -04:00
|
|
|
if os.path.isfile(settings["blacklistfile"]):
|
|
|
|
with open(settings["blacklistfile"], "r") as curFile:
|
|
|
|
write_data(merge_file, curFile.read())
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
return merge_file
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-12-30 17:12:04 +01:00
|
|
|
def compress_file(input_file, target_ip, output_file):
|
|
|
|
"""
|
2017-12-30 20:55:12 +01:00
|
|
|
Reduce the file dimension removing non-necessary lines (empty lines and
|
|
|
|
comments) and putting multiple domains in each line.
|
|
|
|
Reducing the number of lines of the file, the parsing under Microsoft
|
|
|
|
Windows is much faster.
|
2017-12-30 17:12:04 +01:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
input_file : file
|
|
|
|
The file object that contains the hostnames that we are reducing.
|
|
|
|
target_ip : str
|
|
|
|
The target IP address.
|
|
|
|
output_file : file
|
|
|
|
The file object that will contain the reduced hostnames.
|
|
|
|
"""
|
|
|
|
|
|
|
|
input_file.seek(0) # reset file pointer
|
2019-07-13 13:40:13 +02:00
|
|
|
write_data(output_file, "\n")
|
2017-12-30 17:12:04 +01:00
|
|
|
|
2018-02-16 10:17:22 +05:30
|
|
|
target_ip_len = len(target_ip)
|
2017-12-30 17:12:04 +01:00
|
|
|
lines = [target_ip]
|
|
|
|
lines_index = 0
|
|
|
|
for line in input_file.readlines():
|
|
|
|
line = line.decode("UTF-8")
|
|
|
|
|
|
|
|
if line.startswith(target_ip):
|
2019-07-13 13:40:13 +02:00
|
|
|
if lines[lines_index].count(" ") < 9:
|
2019-07-13 13:41:36 +02:00
|
|
|
lines[lines_index] += (
|
|
|
|
" " + line[target_ip_len : line.find("#")].strip() # noqa: E203
|
|
|
|
)
|
2017-12-30 17:12:04 +01:00
|
|
|
else:
|
2019-07-13 13:40:13 +02:00
|
|
|
lines[lines_index] += "\n"
|
|
|
|
lines.append(line[: line.find("#")].strip())
|
2017-12-30 17:12:04 +01:00
|
|
|
lines_index += 1
|
|
|
|
|
|
|
|
for line in lines:
|
2018-02-16 08:26:16 +05:30
|
|
|
write_data(output_file, line)
|
|
|
|
|
|
|
|
input_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
def minimise_file(input_file, target_ip, output_file):
|
|
|
|
"""
|
|
|
|
Reduce the file dimension removing non-necessary lines (empty lines and
|
|
|
|
comments).
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
input_file : file
|
|
|
|
The file object that contains the hostnames that we are reducing.
|
|
|
|
target_ip : str
|
|
|
|
The target IP address.
|
|
|
|
output_file : file
|
|
|
|
The file object that will contain the reduced hostnames.
|
|
|
|
"""
|
|
|
|
|
|
|
|
input_file.seek(0) # reset file pointer
|
2019-07-13 13:40:13 +02:00
|
|
|
write_data(output_file, "\n")
|
2018-02-16 08:26:16 +05:30
|
|
|
|
|
|
|
lines = []
|
|
|
|
for line in input_file.readlines():
|
|
|
|
line = line.decode("UTF-8")
|
|
|
|
|
|
|
|
if line.startswith(target_ip):
|
2019-07-13 13:40:13 +02:00
|
|
|
lines.append(line[: line.find("#")].strip() + "\n")
|
2018-02-16 08:26:16 +05:30
|
|
|
|
|
|
|
for line in lines:
|
2017-12-30 17:12:04 +01:00
|
|
|
write_data(output_file, line)
|
|
|
|
|
|
|
|
input_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Remove duplicates and remove hosts that we are excluding.
|
|
|
|
|
|
|
|
We check for duplicate hostnames as well as remove any hostnames that
|
|
|
|
have been explicitly excluded by the user.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
merge_file : file
|
|
|
|
The file object that contains the hostnames that we are pruning.
|
2017-07-21 01:19:59 -07:00
|
|
|
exclusion_regexes : list
|
|
|
|
The list of regex patterns used to exclude domains.
|
2017-12-30 17:12:04 +01:00
|
|
|
output_file : file
|
2017-12-30 20:55:12 +01:00
|
|
|
The file object in which the result is written. If None, the file
|
|
|
|
'settings["outputpath"]' will be created.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
number_of_rules = settings["numberofrules"]
|
2018-08-03 01:08:38 -07:00
|
|
|
maybe_copy_example_file(settings["whitelistfile"])
|
|
|
|
|
2018-09-08 21:19:51 -04:00
|
|
|
if os.path.isfile(settings["whitelistfile"]):
|
|
|
|
with open(settings["whitelistfile"], "r") as ins:
|
|
|
|
for line in ins:
|
|
|
|
line = line.strip(" \t\n\r")
|
|
|
|
if line and not line.startswith("#"):
|
|
|
|
settings["exclusions"].append(line)
|
2016-12-18 10:55:35 -05:00
|
|
|
|
|
|
|
if not os.path.exists(settings["outputpath"]):
|
|
|
|
os.makedirs(settings["outputpath"])
|
|
|
|
|
2017-12-30 17:12:04 +01:00
|
|
|
if output_file is None:
|
2018-08-10 15:51:45 +02:00
|
|
|
final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b")
|
2017-12-30 17:12:04 +01:00
|
|
|
else:
|
|
|
|
final_file = output_file
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
merge_file.seek(0) # reset file pointer
|
2018-03-19 23:48:22 -04:00
|
|
|
hostnames = {"localhost", "localhost.localdomain", "local", "broadcasthost"}
|
2016-12-18 10:55:35 -05:00
|
|
|
exclusions = settings["exclusions"]
|
2017-05-15 11:51:17 -04:00
|
|
|
|
|
|
|
for line in merge_file.readlines():
|
2017-05-17 23:17:37 -04:00
|
|
|
write_line = True
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
# Explicit encoding
|
|
|
|
line = line.decode("UTF-8")
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
# replace tabs with space
|
|
|
|
line = line.replace("\t+", " ")
|
2017-05-15 11:51:17 -04:00
|
|
|
|
|
|
|
# see gh-271: trim trailing whitespace, periods
|
2019-07-13 13:40:13 +02:00
|
|
|
line = line.rstrip(" .")
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
# Testing the first character doesn't require startswith
|
2019-07-13 13:40:13 +02:00
|
|
|
if line[0] == "#" or re.match(r"^\s*$", line[0]):
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, line)
|
2016-12-18 10:55:35 -05:00
|
|
|
continue
|
|
|
|
if "::1" in line:
|
|
|
|
continue
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
stripped_rule = strip_rule(line) # strip comments
|
2019-07-13 13:40:13 +02:00
|
|
|
if not stripped_rule or matches_exclusions(stripped_rule, exclusion_regexes):
|
2016-12-18 10:55:35 -05:00
|
|
|
continue
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2021-04-26 17:55:39 -04:00
|
|
|
# Issue #1628
|
2021-07-13 20:14:25 +02:00
|
|
|
if "@" in stripped_rule:
|
2021-04-26 17:55:39 -04:00
|
|
|
continue
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
# Normalize rule
|
2017-06-29 20:55:41 -07:00
|
|
|
hostname, normalized_rule = normalize_rule(
|
2019-07-13 13:40:13 +02:00
|
|
|
stripped_rule,
|
|
|
|
target_ip=settings["targetip"],
|
|
|
|
keep_domain_comments=settings["keepdomaincomments"],
|
|
|
|
)
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
for exclude in exclusions:
|
2021-07-13 20:11:28 +02:00
|
|
|
if re.search(r"(^|[\s\.])" + re.escape(exclude) + r"\s", line):
|
2017-05-17 23:17:37 -04:00
|
|
|
write_line = False
|
2016-12-18 10:55:35 -05:00
|
|
|
break
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
if normalized_rule and (hostname not in hostnames) and write_line:
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, normalized_rule)
|
2016-12-18 10:55:35 -05:00
|
|
|
hostnames.add(hostname)
|
2017-05-15 11:51:17 -04:00
|
|
|
number_of_rules += 1
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
settings["numberofrules"] = number_of_rules
|
|
|
|
merge_file.close()
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-12-30 17:12:04 +01:00
|
|
|
if output_file is None:
|
|
|
|
return final_file
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
def normalize_rule(rule, target_ip, keep_domain_comments):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Standardize and format the rule string provided.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
rule : str
|
|
|
|
The rule whose spelling and spacing we are standardizing.
|
2017-06-29 20:55:41 -07:00
|
|
|
target_ip : str
|
|
|
|
The target IP address for the rule.
|
|
|
|
keep_domain_comments : bool
|
|
|
|
Whether or not to keep comments regarding these domains in
|
|
|
|
the normalized rule.
|
2017-05-17 23:17:37 -04:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
2017-06-29 20:55:41 -07:00
|
|
|
normalized_rule : tuple
|
|
|
|
A tuple of the hostname and the rule string with spelling
|
|
|
|
and spacing reformatted.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2021-03-31 14:57:56 +02:00
|
|
|
def normalize_response(
|
|
|
|
extracted_hostname: str, extracted_suffix: Optional[str]
|
|
|
|
) -> Tuple[str, str]:
|
2021-03-31 14:51:54 +02:00
|
|
|
"""
|
|
|
|
Normalizes the responses after the provision of the extracted
|
|
|
|
hostname and suffix - if exist.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
extracted_hostname: str
|
|
|
|
The extracted hostname to work with.
|
|
|
|
extracted_suffix: str
|
|
|
|
The extracted suffix to with.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
normalized_response: tuple
|
|
|
|
A tuple of the hostname and the rule string with spelling
|
|
|
|
and spacing reformatted.
|
|
|
|
"""
|
|
|
|
|
|
|
|
rule = "%s %s" % (target_ip, extracted_hostname)
|
|
|
|
|
|
|
|
if keep_domain_comments and extracted_suffix:
|
|
|
|
if not extracted_suffix.strip().startswith("#"):
|
2023-09-03 11:22:38 +02:00
|
|
|
# Strings are stripped, therefore we need to add the space back.
|
|
|
|
rule += " # %s" % extracted_suffix
|
2021-03-31 14:51:54 +02:00
|
|
|
else:
|
|
|
|
rule += " %s" % extracted_suffix
|
|
|
|
|
|
|
|
return extracted_hostname, rule + "\n"
|
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
def is_ip(dataset: str) -> bool:
|
|
|
|
"""
|
|
|
|
Checks whether the given dataset is an IP.
|
2023-08-10 16:33:53 +02:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
Parameters
|
|
|
|
----------
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
dataset: str
|
|
|
|
The dataset to work with.
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
is_ip: bool
|
|
|
|
Whether the dataset is an IP.
|
|
|
|
"""
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
try:
|
|
|
|
_ = ipaddress.ip_address(dataset)
|
|
|
|
return True
|
|
|
|
except ValueError:
|
|
|
|
return False
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
def belch_unwanted(unwanted: str) -> Tuple[None, None]:
|
|
|
|
"""
|
|
|
|
Belches unwanted to screen.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
unwanted: str
|
|
|
|
The unwanted string to belch.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
belched: tuple
|
|
|
|
A tuple of None, None.
|
|
|
|
"""
|
2018-03-14 00:09:38 -04:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
"""
|
|
|
|
finally, if we get here, just belch to screen
|
|
|
|
"""
|
|
|
|
print("==>%s<==" % unwanted)
|
|
|
|
return None, None
|
2018-03-14 00:09:38 -04:00
|
|
|
|
2021-03-31 14:51:54 +02:00
|
|
|
"""
|
2023-09-03 11:22:38 +02:00
|
|
|
first try: IP followed by domain
|
2021-03-31 14:51:54 +02:00
|
|
|
"""
|
2018-03-14 00:09:38 -04:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
static_ip_regex = r"^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$"
|
|
|
|
split_rule = rule.split(maxsplit=1)
|
2021-03-31 14:51:54 +02:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
if is_ip(split_rule[0]):
|
|
|
|
# Assume that the first item is an IP address following the rule.
|
2018-03-14 00:09:38 -04:00
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
if " " or "\t" in split_rule[-1]:
|
|
|
|
try:
|
|
|
|
# Example: 0.0.0.0 example.org # hello, world!
|
|
|
|
hostname, suffix = split_rule[-1].split(maxsplit=1)
|
|
|
|
except ValueError:
|
|
|
|
# Example: 0.0.0.0 example.org[:space:]
|
|
|
|
hostname, suffix = split_rule[-1], None
|
|
|
|
else:
|
|
|
|
# Example: 0.0.0.0 example.org
|
|
|
|
hostname, suffix = split_rule[-1], None
|
|
|
|
|
2023-09-07 18:46:54 +02:00
|
|
|
hostname = hostname.lower()
|
|
|
|
|
2023-09-03 13:12:41 +02:00
|
|
|
if (
|
|
|
|
is_ip(hostname)
|
2023-09-03 13:33:33 +02:00
|
|
|
or re.search(static_ip_regex, hostname)
|
2023-09-03 15:11:00 +02:00
|
|
|
or "." not in hostname
|
2023-09-06 21:52:52 +02:00
|
|
|
or ".." in hostname
|
2024-12-16 20:53:49 -04:00
|
|
|
or "." in hostname[-1]
|
|
|
|
or "/" in hostname
|
2023-09-03 15:11:00 +02:00
|
|
|
or ":" in hostname
|
2023-09-03 13:12:41 +02:00
|
|
|
):
|
2023-09-03 11:22:38 +02:00
|
|
|
# Example: 0.0.0.0 127.0.0.1
|
|
|
|
|
2023-09-03 13:12:41 +02:00
|
|
|
# If the hostname is:
|
|
|
|
# - an IP - or looks like it,
|
|
|
|
# - doesn't contain dots, or
|
2024-12-16 20:53:49 -04:00
|
|
|
# - contains repeated dots,
|
|
|
|
# - ends in a dot, or
|
|
|
|
# - contains a slash, or
|
2023-09-03 13:12:41 +02:00
|
|
|
# - contains a colon,
|
|
|
|
# we don't want to normalize it.
|
2023-09-03 11:22:38 +02:00
|
|
|
return belch_unwanted(rule)
|
|
|
|
|
|
|
|
return normalize_response(hostname, suffix)
|
|
|
|
|
2023-09-03 13:12:41 +02:00
|
|
|
if (
|
|
|
|
not re.search(static_ip_regex, split_rule[0])
|
|
|
|
and ":" not in split_rule[0]
|
2023-09-06 21:52:52 +02:00
|
|
|
and ".." not in split_rule[0]
|
2023-09-08 18:45:30 +02:00
|
|
|
and "/" not in split_rule[0]
|
2023-09-03 13:12:41 +02:00
|
|
|
and "." in split_rule[0]
|
|
|
|
):
|
2023-09-06 21:52:52 +02:00
|
|
|
# Deny anything that looks like an IP; doesn't container dots or INVALID.
|
2023-09-03 11:22:38 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
hostname, suffix = split_rule
|
|
|
|
except ValueError:
|
|
|
|
hostname, suffix = split_rule[0], None
|
|
|
|
|
2023-09-07 18:46:54 +02:00
|
|
|
hostname = hostname.lower()
|
|
|
|
|
2023-09-03 11:22:38 +02:00
|
|
|
return normalize_response(hostname, suffix)
|
|
|
|
|
|
|
|
return belch_unwanted(rule)
|
2014-05-16 08:13:11 -04:00
|
|
|
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
def strip_rule(line):
|
|
|
|
"""
|
|
|
|
Sanitize a rule string provided before writing it to the output hosts file.
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
line : str
|
|
|
|
The rule provided for sanitation.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
sanitized_line : str
|
|
|
|
The sanitized rule.
|
|
|
|
"""
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2021-03-31 14:51:54 +02:00
|
|
|
return " ".join(line.split())
|
2017-05-15 11:51:17 -04:00
|
|
|
|
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
def write_opening_header(final_file, **header_params):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Write the header information into the newly-created hosts file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
final_file : file
|
|
|
|
The file object that points to the newly-created hosts file.
|
2017-06-29 20:55:41 -07:00
|
|
|
header_params : kwargs
|
|
|
|
Dictionary providing additional parameters for populating the header
|
|
|
|
information. Currently, those fields are:
|
|
|
|
|
|
|
|
1) extensions
|
|
|
|
2) numberofrules
|
|
|
|
3) outputsubfolder
|
|
|
|
4) skipstatichosts
|
2023-05-23 20:03:43 +02:00
|
|
|
5) nounifiedhosts
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
final_file.seek(0) # Reset file pointer.
|
|
|
|
file_contents = final_file.read() # Save content.
|
|
|
|
|
|
|
|
final_file.seek(0) # Write at the top.
|
|
|
|
|
2023-05-23 20:03:43 +02:00
|
|
|
no_unified_hosts = header_params["nounifiedhosts"]
|
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
if header_params["extensions"]:
|
2023-05-23 20:03:43 +02:00
|
|
|
if no_unified_hosts:
|
|
|
|
if len(header_params["extensions"]) > 1:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Title: StevenBlack/hosts extensions {0} and {1} \n#\n".format(
|
|
|
|
", ".join(header_params["extensions"][:-1]),
|
|
|
|
header_params["extensions"][-1],
|
|
|
|
),
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Title: StevenBlack/hosts extension {0}\n#\n".format(
|
|
|
|
", ".join(header_params["extensions"])
|
|
|
|
),
|
|
|
|
)
|
2019-07-13 13:41:36 +02:00
|
|
|
else:
|
2023-05-23 20:03:43 +02:00
|
|
|
if len(header_params["extensions"]) > 1:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format(
|
|
|
|
", ".join(header_params["extensions"][:-1]),
|
|
|
|
header_params["extensions"][-1],
|
|
|
|
),
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Title: StevenBlack/hosts with the {0} extension\n#\n".format(
|
|
|
|
", ".join(header_params["extensions"])
|
|
|
|
),
|
|
|
|
)
|
2019-07-13 13:41:36 +02:00
|
|
|
else:
|
2020-06-07 20:11:57 +02:00
|
|
|
write_data(final_file, "# Title: StevenBlack/hosts\n#\n")
|
2019-07-13 13:41:36 +02:00
|
|
|
|
2019-07-13 21:06:30 +02:00
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# This hosts file is a merged collection "
|
|
|
|
"of hosts from reputable sources,\n",
|
|
|
|
)
|
2020-02-22 15:24:28 +02:00
|
|
|
write_data(final_file, "# with a dash of crowd sourcing via GitHub\n#\n")
|
2019-07-13 21:06:30 +02:00
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Date: " + time.strftime("%d %B %Y %H:%M:%S (%Z)", time.gmtime()) + "\n",
|
|
|
|
)
|
|
|
|
|
|
|
|
if header_params["extensions"]:
|
2023-05-23 20:03:43 +02:00
|
|
|
if header_params["nounifiedhosts"]:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# The unified hosts file was not used while generating this file.\n"
|
|
|
|
"# Extensions used to generate this file: "
|
|
|
|
+ ", ".join(header_params["extensions"])
|
|
|
|
+ "\n",
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Extensions added to this file: "
|
|
|
|
+ ", ".join(header_params["extensions"])
|
|
|
|
+ "\n",
|
|
|
|
)
|
2019-07-13 21:06:30 +02:00
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
(
|
|
|
|
"# Number of unique domains: {:,}\n#\n".format(
|
|
|
|
header_params["numberofrules"]
|
|
|
|
)
|
|
|
|
),
|
|
|
|
)
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Fetch the latest version of this file: "
|
|
|
|
"https://raw.githubusercontent.com/StevenBlack/hosts/master/"
|
2020-02-24 18:34:08 +01:00
|
|
|
+ path_join_robust(header_params["outputsubfolder"], "").replace("\\", "/")
|
2019-07-13 13:40:13 +02:00
|
|
|
+ "hosts\n",
|
|
|
|
)
|
|
|
|
write_data(
|
|
|
|
final_file, "# Project home page: https://github.com/StevenBlack/hosts\n"
|
|
|
|
)
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# Project releases: https://github.com/StevenBlack/hosts/releases\n#\n",
|
|
|
|
)
|
|
|
|
write_data(
|
|
|
|
final_file,
|
|
|
|
"# ===============================================================\n",
|
|
|
|
)
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "\n")
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
if not header_params["skipstatichosts"]:
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "127.0.0.1 localhost\n")
|
|
|
|
write_data(final_file, "127.0.0.1 localhost.localdomain\n")
|
|
|
|
write_data(final_file, "127.0.0.1 local\n")
|
|
|
|
write_data(final_file, "255.255.255.255 broadcasthost\n")
|
2018-04-02 19:07:56 +02:00
|
|
|
write_data(final_file, "::1 localhost\n")
|
|
|
|
write_data(final_file, "::1 ip6-localhost\n")
|
|
|
|
write_data(final_file, "::1 ip6-loopback\n")
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "fe80::1%lo0 localhost\n")
|
2018-04-02 19:07:56 +02:00
|
|
|
write_data(final_file, "ff00::0 ip6-localnet\n")
|
|
|
|
write_data(final_file, "ff00::0 ip6-mcastprefix\n")
|
2018-02-09 19:18:54 +01:00
|
|
|
write_data(final_file, "ff02::1 ip6-allnodes\n")
|
|
|
|
write_data(final_file, "ff02::2 ip6-allrouters\n")
|
2018-04-02 19:07:56 +02:00
|
|
|
write_data(final_file, "ff02::3 ip6-allhosts\n")
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "0.0.0.0 0.0.0.0\n")
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
if platform.system() == "Linux":
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "127.0.1.1 " + socket.gethostname() + "\n")
|
|
|
|
write_data(final_file, "127.0.0.53 " + socket.gethostname() + "\n")
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
write_data(final_file, "\n")
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-19 16:13:02 -04:00
|
|
|
preamble = path_join_robust(BASEDIR_PATH, "myhosts")
|
2018-08-03 01:08:38 -07:00
|
|
|
maybe_copy_example_file(preamble)
|
2017-06-29 20:55:41 -07:00
|
|
|
|
2018-09-08 21:19:51 -04:00
|
|
|
if os.path.isfile(preamble):
|
|
|
|
with open(preamble, "r") as f:
|
|
|
|
write_data(final_file, f.read())
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
final_file.write(file_contents)
|
2016-02-07 19:18:16 -05:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
def update_readme_data(readme_file, **readme_updates):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Update the host and website information provided in the README JSON data.
|
2017-06-29 20:55:41 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
readme_file : str
|
|
|
|
The name of the README file to update.
|
|
|
|
readme_updates : kwargs
|
|
|
|
Dictionary providing additional JSON fields to update before
|
|
|
|
saving the data. Currently, those fields are:
|
|
|
|
|
|
|
|
1) extensions
|
|
|
|
2) sourcesdata
|
|
|
|
3) numberofrules
|
|
|
|
4) outputsubfolder
|
2023-05-23 20:03:43 +02:00
|
|
|
5) nounifiedhosts
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
extensions_key = "base"
|
2017-06-29 20:55:41 -07:00
|
|
|
extensions = readme_updates["extensions"]
|
2023-05-23 20:03:43 +02:00
|
|
|
no_unified_hosts = readme_updates["nounifiedhosts"]
|
2016-03-23 00:36:01 -04:00
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
if extensions:
|
|
|
|
extensions_key = "-".join(extensions)
|
2023-05-23 20:03:43 +02:00
|
|
|
if no_unified_hosts:
|
|
|
|
extensions_key = extensions_key + "-only"
|
2017-06-29 20:55:41 -07:00
|
|
|
|
|
|
|
output_folder = readme_updates["outputsubfolder"]
|
2019-07-13 13:40:13 +02:00
|
|
|
generation_data = {
|
|
|
|
"location": path_join_robust(output_folder, ""),
|
2023-05-23 20:03:43 +02:00
|
|
|
"no_unified_hosts": no_unified_hosts,
|
2019-07-13 13:40:13 +02:00
|
|
|
"entries": readme_updates["numberofrules"],
|
|
|
|
"sourcesdata": readme_updates["sourcesdata"],
|
|
|
|
}
|
2017-06-29 20:55:41 -07:00
|
|
|
|
|
|
|
with open(readme_file, "r") as f:
|
|
|
|
readme_data = json.load(f)
|
|
|
|
readme_data[extensions_key] = generation_data
|
|
|
|
|
2020-06-07 18:28:32 +02:00
|
|
|
for denomination, data in readme_data.copy().items():
|
|
|
|
if "location" in data and data["location"] and "\\" in data["location"]:
|
|
|
|
# Windows compatibility: #1166
|
|
|
|
readme_data[denomination]["location"] = data["location"].replace("\\", "/")
|
|
|
|
|
2017-06-29 20:55:41 -07:00
|
|
|
with open(readme_file, "w") as f:
|
|
|
|
json.dump(readme_data, f)
|
2016-03-23 00:36:01 -04:00
|
|
|
|
2016-06-12 14:04:38 +02:00
|
|
|
|
2017-05-13 23:38:59 -04:00
|
|
|
def move_hosts_file_into_place(final_file):
|
2024-04-03 08:18:10 +03:00
|
|
|
r"""
|
2017-05-13 23:38:59 -04:00
|
|
|
Move the newly-created hosts file into its correct location on the OS.
|
2017-05-14 22:30:36 -04:00
|
|
|
|
2017-05-13 23:38:59 -04:00
|
|
|
For UNIX systems, the hosts file is "etc/hosts." On Windows, it's
|
2017-05-15 11:51:17 -04:00
|
|
|
"C:\Windows\System32\drivers\etc\hosts."
|
2017-05-14 22:30:36 -04:00
|
|
|
|
2017-05-13 23:38:59 -04:00
|
|
|
For this move to work, you must have administrator privileges to do this.
|
|
|
|
On UNIX systems, this means having "sudo" access, and on Windows, it
|
|
|
|
means being able to run command prompt in administrator mode.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2017-05-15 11:51:17 -04:00
|
|
|
final_file : file object
|
|
|
|
The newly-created hosts file to move.
|
2018-08-10 17:43:18 +02:00
|
|
|
""" # noqa: W605
|
2017-05-13 23:38:59 -04:00
|
|
|
|
|
|
|
filename = os.path.abspath(final_file.name)
|
2022-07-06 13:07:02 -04:00
|
|
|
|
|
|
|
try:
|
|
|
|
if not Path(filename).exists():
|
|
|
|
raise FileNotFoundError
|
|
|
|
except Exception:
|
|
|
|
print_failure(f"{filename} does not exist.")
|
|
|
|
return False
|
|
|
|
|
2022-07-05 12:54:31 -04:00
|
|
|
if platform.system() == "Windows":
|
2023-09-03 11:22:38 +02:00
|
|
|
target_file = str(
|
|
|
|
Path(os.getenv("SystemRoot")) / "system32" / "drivers" / "etc" / "hosts"
|
|
|
|
)
|
2022-07-05 12:54:31 -04:00
|
|
|
else:
|
2022-07-05 12:39:02 -04:00
|
|
|
target_file = "/etc/hosts"
|
|
|
|
|
2022-07-05 12:54:31 -04:00
|
|
|
if os.getenv("IN_CONTAINER"):
|
|
|
|
# It's not allowed to remove/replace a mounted /etc/hosts, so we replace the content.
|
|
|
|
# This requires running the container user as root, as is the default.
|
|
|
|
print(f"Running in container, so we will replace the content of {target_file}.")
|
2022-07-05 12:39:02 -04:00
|
|
|
try:
|
|
|
|
with open(target_file, "w") as target_stream:
|
|
|
|
with open(filename, "r") as source_stream:
|
2022-07-06 13:08:25 -04:00
|
|
|
source = source_stream.read()
|
|
|
|
target_stream.write(source)
|
2022-07-05 12:39:02 -04:00
|
|
|
return True
|
|
|
|
except Exception:
|
2022-07-05 12:54:31 -04:00
|
|
|
print_failure(f"Replacing content of {target_file} failed.")
|
|
|
|
return False
|
2023-09-03 11:22:38 +02:00
|
|
|
elif (
|
|
|
|
platform.system() == "Linux"
|
|
|
|
or platform.system() == "Windows"
|
|
|
|
or platform.system() == "Darwin"
|
|
|
|
):
|
2022-07-05 12:54:31 -04:00
|
|
|
print(
|
|
|
|
f"Replacing {target_file} requires root privileges. You might need to enter your password."
|
|
|
|
)
|
|
|
|
try:
|
2022-07-25 12:22:30 +00:00
|
|
|
subprocess.run(SUDO + ["cp", filename, target_file], check=True)
|
2022-07-05 12:54:31 -04:00
|
|
|
return True
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
print_failure(f"Replacing {target_file} failed.")
|
|
|
|
return False
|
2016-06-12 14:04:38 +02:00
|
|
|
|
|
|
|
|
2017-05-13 23:38:59 -04:00
|
|
|
def flush_dns_cache():
|
|
|
|
"""
|
|
|
|
Flush the DNS cache.
|
|
|
|
"""
|
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
print("Flushing the DNS cache to utilize new hosts file...")
|
2019-07-13 13:40:13 +02:00
|
|
|
print(
|
|
|
|
"Flushing the DNS cache requires administrative privileges. You might need to enter your password."
|
|
|
|
)
|
2017-05-13 23:38:59 -04:00
|
|
|
|
|
|
|
dns_cache_found = False
|
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
if platform.system() == "Darwin":
|
2018-02-27 19:26:40 +01:00
|
|
|
if subprocess.call(SUDO + ["killall", "-HUP", "mDNSResponder"]):
|
2017-05-15 11:51:17 -04:00
|
|
|
print_failure("Flushing the DNS cache failed.")
|
2017-05-13 23:38:59 -04:00
|
|
|
elif os.name == "nt":
|
|
|
|
print("Automatically flushing the DNS cache is not yet supported.")
|
2019-07-13 13:40:13 +02:00
|
|
|
print(
|
|
|
|
"Please copy and paste the command 'ipconfig /flushdns' in "
|
|
|
|
"administrator command prompt after running this script."
|
|
|
|
)
|
2016-12-18 10:55:35 -05:00
|
|
|
else:
|
2017-05-23 21:23:49 -04:00
|
|
|
nscd_prefixes = ["/etc", "/etc/rc.d"]
|
|
|
|
nscd_msg = "Flushing the DNS cache by restarting nscd {result}"
|
2017-05-18 11:23:10 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
for nscd_prefix in nscd_prefixes:
|
|
|
|
nscd_cache = nscd_prefix + "/init.d/nscd"
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
if os.path.isfile(nscd_cache):
|
2017-05-22 21:18:59 -04:00
|
|
|
dns_cache_found = True
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2018-03-03 15:30:49 +01:00
|
|
|
if subprocess.call(SUDO + [nscd_cache, "restart"]):
|
2017-05-23 21:23:49 -04:00
|
|
|
print_failure(nscd_msg.format(result="failed"))
|
2017-05-22 21:18:59 -04:00
|
|
|
else:
|
2017-05-23 21:23:49 -04:00
|
|
|
print_success(nscd_msg.format(result="succeeded"))
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2018-05-22 21:39:00 -04:00
|
|
|
centos_file = "/etc/init.d/network"
|
|
|
|
centos_msg = "Flushing the DNS cache by restarting network {result}"
|
|
|
|
|
|
|
|
if os.path.isfile(centos_file):
|
|
|
|
if subprocess.call(SUDO + [centos_file, "restart"]):
|
|
|
|
print_failure(centos_msg.format(result="failed"))
|
|
|
|
else:
|
|
|
|
print_success(centos_msg.format(result="succeeded"))
|
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
system_prefixes = ["/usr", ""]
|
|
|
|
service_types = ["NetworkManager", "wicd", "dnsmasq", "networking"]
|
2020-07-10 23:48:36 +03:00
|
|
|
restarted_services = []
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
for system_prefix in system_prefixes:
|
|
|
|
systemctl = system_prefix + "/bin/systemctl"
|
|
|
|
system_dir = system_prefix + "/lib/systemd/system"
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
for service_type in service_types:
|
|
|
|
service = service_type + ".service"
|
2020-07-10 23:48:36 +03:00
|
|
|
if service in restarted_services:
|
|
|
|
continue
|
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
service_file = path_join_robust(system_dir, service)
|
2019-07-13 13:40:13 +02:00
|
|
|
service_msg = (
|
|
|
|
"Flushing the DNS cache by restarting " + service + " {result}"
|
|
|
|
)
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
if os.path.isfile(service_file):
|
2021-03-31 14:57:56 +02:00
|
|
|
if 0 != subprocess.call(
|
|
|
|
[systemctl, "status", service], stdout=subprocess.DEVNULL
|
|
|
|
):
|
2020-07-10 22:47:47 +03:00
|
|
|
continue
|
2017-05-23 21:23:49 -04:00
|
|
|
dns_cache_found = True
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2018-03-03 15:30:49 +01:00
|
|
|
if subprocess.call(SUDO + [systemctl, "restart", service]):
|
2017-05-23 21:23:49 -04:00
|
|
|
print_failure(service_msg.format(result="failed"))
|
|
|
|
else:
|
|
|
|
print_success(service_msg.format(result="succeeded"))
|
2020-07-11 13:37:08 +03:00
|
|
|
restarted_services.append(service)
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
dns_clean_file = "/etc/init.d/dns-clean"
|
2018-08-03 10:43:32 -07:00
|
|
|
dns_clean_msg = "Flushing the DNS cache via dns-clean executable {result}"
|
2017-05-13 23:38:59 -04:00
|
|
|
|
2017-05-23 21:23:49 -04:00
|
|
|
if os.path.isfile(dns_clean_file):
|
2017-05-18 11:23:10 -04:00
|
|
|
dns_cache_found = True
|
|
|
|
|
2018-03-03 15:30:49 +01:00
|
|
|
if subprocess.call(SUDO + [dns_clean_file, "start"]):
|
2017-05-23 21:23:49 -04:00
|
|
|
print_failure(dns_clean_msg.format(result="failed"))
|
2017-05-18 11:23:10 -04:00
|
|
|
else:
|
2017-05-23 21:23:49 -04:00
|
|
|
print_success(dns_clean_msg.format(result="succeeded"))
|
2017-05-18 11:23:10 -04:00
|
|
|
|
2017-05-13 23:38:59 -04:00
|
|
|
if not dns_cache_found:
|
2017-05-15 11:51:17 -04:00
|
|
|
print_failure("Unable to determine DNS management tool.")
|
2016-06-12 14:04:38 +02:00
|
|
|
|
2016-02-07 19:15:05 -05:00
|
|
|
|
2023-05-24 17:51:35 +02:00
|
|
|
def remove_old_hosts_file(path_to_file, file_name, backup):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Remove the old hosts file.
|
|
|
|
|
|
|
|
This is a hotfix because merging with an already existing hosts file leads
|
|
|
|
to artifacts and duplicates.
|
2017-06-29 20:55:41 -07:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
backup : boolean, default False
|
|
|
|
Whether or not to backup the existing hosts file.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2023-05-24 17:51:35 +02:00
|
|
|
full_file_path = path_join_robust(path_to_file, file_name)
|
2016-04-03 21:29:47 -04:00
|
|
|
|
2023-05-24 17:51:35 +02:00
|
|
|
if os.path.exists(full_file_path):
|
|
|
|
if backup:
|
|
|
|
backup_file_path = full_file_path + "-{}".format(
|
|
|
|
time.strftime("%Y-%m-%d-%H-%M-%S")
|
|
|
|
)
|
|
|
|
|
|
|
|
# Make a backup copy, marking the date in which the list was updated
|
|
|
|
shutil.copy(full_file_path, backup_file_path)
|
2016-04-03 21:29:47 -04:00
|
|
|
|
2023-05-24 17:51:35 +02:00
|
|
|
os.remove(full_file_path)
|
2013-07-13 16:08:31 -06:00
|
|
|
|
2023-05-24 17:51:35 +02:00
|
|
|
# Create directory if not exists
|
|
|
|
if not os.path.exists(path_to_file):
|
|
|
|
os.makedirs(path_to_file)
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
# Create new empty hosts file
|
2023-05-24 17:51:35 +02:00
|
|
|
open(full_file_path, "a").close()
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# End File Logic
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2018-02-28 23:13:13 +01:00
|
|
|
|
2018-02-28 23:06:58 +01:00
|
|
|
def domain_to_idna(line):
|
|
|
|
"""
|
2020-04-23 09:02:38 +03:00
|
|
|
Encode a domain that is present into a line into `idna`. This way we
|
|
|
|
avoid most encoding issues.
|
2018-02-28 23:06:58 +01:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
line : str
|
|
|
|
The line we have to encode/decode.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
line : str
|
|
|
|
The line in a converted format.
|
|
|
|
|
|
|
|
Notes
|
|
|
|
-----
|
2020-04-23 09:02:38 +03:00
|
|
|
- This function encodes only the domain to `idna` format because in
|
2018-03-02 22:53:15 +01:00
|
|
|
most cases, the encoding issue is due to a domain which looks like
|
2018-02-28 23:06:58 +01:00
|
|
|
`b'\xc9\xa2oogle.com'.decode('idna')`.
|
|
|
|
- About the splitting:
|
2018-02-28 23:20:01 +01:00
|
|
|
We split because we only want to encode the domain and not the full
|
2018-03-02 22:56:32 +01:00
|
|
|
line, which may cause some issues. Keep in mind that we split, but we
|
2018-03-02 22:53:15 +01:00
|
|
|
still concatenate once we encoded the domain.
|
2018-02-28 23:06:58 +01:00
|
|
|
|
|
|
|
- The following split the prefix `0.0.0.0` or `127.0.0.1` of a line.
|
|
|
|
- The following also split the trailing comment of a given line.
|
|
|
|
"""
|
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
if not line.startswith("#"):
|
|
|
|
tabs = "\t"
|
|
|
|
space = " "
|
2018-03-04 11:17:38 +01:00
|
|
|
|
|
|
|
tabs_position, space_position = (line.find(tabs), line.find(space))
|
|
|
|
|
|
|
|
if tabs_position > -1 and space_position > -1:
|
|
|
|
if space_position < tabs_position:
|
|
|
|
separator = space
|
|
|
|
else:
|
|
|
|
separator = tabs
|
|
|
|
elif not tabs_position == -1:
|
|
|
|
separator = tabs
|
|
|
|
elif not space_position == -1:
|
|
|
|
separator = space
|
|
|
|
else:
|
2019-07-13 13:40:13 +02:00
|
|
|
separator = ""
|
2018-03-04 11:17:38 +01:00
|
|
|
|
|
|
|
if separator:
|
|
|
|
splited_line = line.split(separator)
|
|
|
|
|
2019-01-30 01:08:59 +01:00
|
|
|
try:
|
|
|
|
index = 1
|
|
|
|
while index < len(splited_line):
|
|
|
|
if splited_line[index]:
|
|
|
|
break
|
|
|
|
index += 1
|
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
if "#" in splited_line[index]:
|
|
|
|
index_comment = splited_line[index].find("#")
|
2019-01-30 01:08:59 +01:00
|
|
|
|
|
|
|
if index_comment > -1:
|
|
|
|
comment = splited_line[index][index_comment:]
|
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
splited_line[index] = (
|
|
|
|
splited_line[index]
|
|
|
|
.split(comment)[0]
|
|
|
|
.encode("IDNA")
|
|
|
|
.decode("UTF-8")
|
|
|
|
+ comment
|
|
|
|
)
|
2019-01-30 01:08:59 +01:00
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
splited_line[index] = splited_line[index].encode("IDNA").decode("UTF-8")
|
2019-01-30 01:08:59 +01:00
|
|
|
except IndexError:
|
|
|
|
pass
|
2018-03-04 11:17:38 +01:00
|
|
|
return separator.join(splited_line)
|
2018-02-28 23:06:58 +01:00
|
|
|
return line.encode("IDNA").decode("UTF-8")
|
|
|
|
return line.encode("UTF-8").decode("UTF-8")
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2018-02-28 23:24:58 +01:00
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# Helper Functions
|
2018-08-03 01:08:38 -07:00
|
|
|
def maybe_copy_example_file(file_path):
|
|
|
|
"""
|
|
|
|
Given a file path, copy over its ".example" if the path doesn't exist.
|
|
|
|
|
|
|
|
If the path does exist, nothing happens in this function.
|
|
|
|
|
2018-09-08 21:19:51 -04:00
|
|
|
If the path doesn't exist, and the ".example" file doesn't exist, nothing happens in this function.
|
|
|
|
|
2018-08-03 01:08:38 -07:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
file_path : str
|
|
|
|
The full file path to check.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not os.path.isfile(file_path):
|
|
|
|
example_file_path = file_path + ".example"
|
2018-09-08 21:19:51 -04:00
|
|
|
if os.path.isfile(example_file_path):
|
|
|
|
shutil.copyfile(example_file_path, file_path)
|
2018-08-03 01:08:38 -07:00
|
|
|
|
|
|
|
|
2020-08-20 16:15:37 -04:00
|
|
|
def get_file_by_url(url, params=None, **kwargs):
|
2020-08-20 17:01:11 -04:00
|
|
|
"""
|
2020-08-20 18:42:31 -04:00
|
|
|
Retrieve the contents of the hosts file at the URL, then pass it through domain_to_idna().
|
2020-08-20 17:01:11 -04:00
|
|
|
|
2020-08-20 18:42:31 -04:00
|
|
|
Parameters are passed to the requests.get() function.
|
2020-08-20 17:01:11 -04:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2020-08-25 18:28:21 -04:00
|
|
|
url : str or bytes
|
|
|
|
URL for the new Request object.
|
|
|
|
params :
|
|
|
|
Dictionary, list of tuples or bytes to send in the query string for the Request.
|
|
|
|
kwargs :
|
|
|
|
Optional arguments that request takes.
|
2020-08-20 17:01:11 -04:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
2020-08-25 18:28:21 -04:00
|
|
|
url_data : str or None
|
|
|
|
The data retrieved at that URL from the file. Returns None if the
|
|
|
|
attempted retrieval is unsuccessful.
|
2020-08-20 17:01:11 -04:00
|
|
|
"""
|
2020-08-21 14:44:27 -04:00
|
|
|
|
2020-08-25 18:28:21 -04:00
|
|
|
try:
|
|
|
|
req = requests.get(url=url, params=params, **kwargs)
|
|
|
|
except requests.exceptions.RequestException:
|
|
|
|
print("Error retrieving data from {}".format(url))
|
|
|
|
return None
|
|
|
|
|
2020-08-18 19:36:47 -04:00
|
|
|
req.encoding = req.apparent_encoding
|
2020-08-25 20:48:35 -04:00
|
|
|
res_text = "\n".join([domain_to_idna(line) for line in req.text.split("\n")])
|
|
|
|
return res_text
|
2017-05-17 23:17:37 -04:00
|
|
|
|
|
|
|
|
|
|
|
def write_data(f, data):
|
|
|
|
"""
|
2018-08-15 12:28:36 +02:00
|
|
|
Write data to a file object.
|
2017-05-17 23:17:37 -04:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
f : file
|
|
|
|
The file object at which to write the data.
|
|
|
|
data : str
|
|
|
|
The data to write to the file.
|
|
|
|
"""
|
|
|
|
|
2018-08-10 15:51:45 +02:00
|
|
|
f.write(bytes(data, "UTF-8"))
|
2017-05-17 23:17:37 -04:00
|
|
|
|
|
|
|
|
|
|
|
def list_dir_no_hidden(path):
|
|
|
|
"""
|
|
|
|
List all files in a directory, except for hidden files.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
path : str
|
|
|
|
The path of the directory whose files we wish to list.
|
|
|
|
"""
|
|
|
|
|
|
|
|
return glob(os.path.join(path, "*"))
|
|
|
|
|
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
def query_yes_no(question, default="yes"):
|
|
|
|
"""
|
2018-08-10 15:51:45 +02:00
|
|
|
Ask a yes/no question via input() and get answer from the user.
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
Inspired by the following implementation:
|
2016-12-18 10:55:35 -05:00
|
|
|
|
2022-03-12 10:31:34 +02:00
|
|
|
https://code.activestate.com/recipes/577058/
|
2017-05-14 00:09:36 -04:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
question : str
|
|
|
|
The question presented to the user.
|
|
|
|
default : str, default "yes"
|
|
|
|
The presumed answer if the user just hits <Enter>. It must be "yes",
|
|
|
|
"no", or None (means an answer is required of the user).
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
yes : Whether or not the user replied yes to the question.
|
2016-12-18 10:55:35 -05:00
|
|
|
"""
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2019-07-13 13:40:13 +02:00
|
|
|
valid = {"yes": "yes", "y": "yes", "ye": "yes", "no": "no", "n": "no"}
|
|
|
|
prompt = {None: " [y/n] ", "yes": " [Y/n] ", "no": " [y/N] "}.get(default, None)
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
if not prompt:
|
|
|
|
raise ValueError("invalid default answer: '%s'" % default)
|
|
|
|
|
2017-05-14 00:09:36 -04:00
|
|
|
reply = None
|
|
|
|
|
|
|
|
while not reply:
|
2017-05-15 11:51:17 -04:00
|
|
|
sys.stdout.write(colorize(question, Colors.PROMPT) + prompt)
|
2017-05-14 00:09:36 -04:00
|
|
|
|
2018-08-10 15:51:45 +02:00
|
|
|
choice = input().lower()
|
2017-05-14 00:09:36 -04:00
|
|
|
reply = None
|
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
if default and not choice:
|
2017-05-14 00:09:36 -04:00
|
|
|
reply = default
|
2016-12-18 10:55:35 -05:00
|
|
|
elif choice in valid:
|
2017-05-14 00:09:36 -04:00
|
|
|
reply = valid[choice]
|
2016-12-18 10:55:35 -05:00
|
|
|
else:
|
2018-03-19 23:48:22 -04:00
|
|
|
print_failure("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
|
2017-05-14 00:09:36 -04:00
|
|
|
|
|
|
|
return reply == "yes"
|
|
|
|
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2021-04-26 17:08:38 -04:00
|
|
|
def is_valid_user_provided_domain_format(domain):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Check whether a provided domain is valid.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
domain : str
|
|
|
|
The domain against which to check.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
valid_domain : bool
|
|
|
|
Whether or not the domain provided is valid.
|
|
|
|
"""
|
|
|
|
|
2016-12-18 10:55:35 -05:00
|
|
|
if domain == "":
|
2017-05-14 14:54:24 -04:00
|
|
|
print("You didn't enter a domain. Try again.")
|
2016-12-18 10:55:35 -05:00
|
|
|
return False
|
2017-05-15 11:51:17 -04:00
|
|
|
|
2018-08-10 17:43:18 +02:00
|
|
|
domain_regex = re.compile(r"www\d{0,3}[.]|https?")
|
2017-05-15 11:51:17 -04:00
|
|
|
|
|
|
|
if domain_regex.match(domain):
|
2019-07-13 13:40:13 +02:00
|
|
|
print(
|
|
|
|
"The domain " + domain + " is not valid. Do not include "
|
|
|
|
"www.domain.com or http(s)://domain.com. Try again."
|
|
|
|
)
|
2016-12-18 10:55:35 -05:00
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
2013-02-17 14:49:16 -07:00
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
|
|
|
|
def recursive_glob(stem, file_pattern):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Recursively match files in a directory according to a pattern.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
stem : str
|
|
|
|
The directory in which to recurse
|
|
|
|
file_pattern : str
|
|
|
|
The filename regex pattern to which to match.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
matches_list : list
|
|
|
|
A list of filenames in the directory that match the file pattern.
|
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
if sys.version_info >= (3, 5):
|
|
|
|
return glob(stem + "/**/" + file_pattern, recursive=True)
|
2016-12-18 23:17:01 -05:00
|
|
|
else:
|
2017-05-22 22:50:59 -04:00
|
|
|
# gh-316: this will avoid invalid unicode comparisons in Python 2.x
|
|
|
|
if stem == str("*"):
|
2016-12-18 23:17:01 -05:00
|
|
|
stem = "."
|
|
|
|
matches = []
|
|
|
|
for root, dirnames, filenames in os.walk(stem):
|
2017-05-15 11:51:17 -04:00
|
|
|
for filename in fnmatch.filter(filenames, file_pattern):
|
2017-05-19 16:13:02 -04:00
|
|
|
matches.append(path_join_robust(root, filename))
|
2016-12-18 23:17:01 -05:00
|
|
|
return matches
|
|
|
|
|
|
|
|
|
2017-05-19 16:13:02 -04:00
|
|
|
def path_join_robust(path, *paths):
|
|
|
|
"""
|
|
|
|
Wrapper around `os.path.join` with handling for locale issues.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
path : str
|
|
|
|
The first path to join.
|
|
|
|
paths : varargs
|
|
|
|
Subsequent path strings to join.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
joined_path : str
|
|
|
|
The joined path string of the two path inputs.
|
|
|
|
|
|
|
|
Raises
|
|
|
|
------
|
|
|
|
locale.Error : A locale issue was detected that prevents path joining.
|
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
2017-05-22 22:50:59 -04:00
|
|
|
# gh-316: joining unicode and str can be saddening in Python 2.x
|
|
|
|
path = str(path)
|
|
|
|
paths = [str(another_path) for another_path in paths]
|
|
|
|
|
2017-05-19 16:13:02 -04:00
|
|
|
return os.path.join(path, *paths)
|
|
|
|
except UnicodeDecodeError as e:
|
2019-07-13 13:40:13 +02:00
|
|
|
raise locale.Error(
|
|
|
|
"Unable to construct path. This is likely a LOCALE issue:\n\n" + str(e)
|
|
|
|
)
|
2017-05-19 16:13:02 -04:00
|
|
|
|
|
|
|
|
2013-02-17 14:49:16 -07:00
|
|
|
# Colors
|
2017-05-15 11:51:17 -04:00
|
|
|
class Colors(object):
|
|
|
|
PROMPT = "\033[94m"
|
2016-12-18 10:55:35 -05:00
|
|
|
SUCCESS = "\033[92m"
|
2017-05-15 11:51:17 -04:00
|
|
|
FAIL = "\033[91m"
|
|
|
|
ENDC = "\033[0m"
|
|
|
|
|
2013-02-17 14:49:16 -07:00
|
|
|
|
2017-05-24 18:03:07 -04:00
|
|
|
def supports_color():
|
|
|
|
"""
|
|
|
|
Check whether the running terminal or command prompt supports color.
|
|
|
|
|
|
|
|
Inspired by StackOverflow link (and Django implementation) here:
|
|
|
|
|
|
|
|
https://stackoverflow.com/questions/7445658
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
colors_supported : bool
|
|
|
|
Whether the running terminal or command prompt supports color.
|
|
|
|
"""
|
|
|
|
|
|
|
|
sys_platform = sys.platform
|
2019-07-13 13:40:13 +02:00
|
|
|
supported = sys_platform != "Pocket PC" and (
|
|
|
|
sys_platform != "win32" or "ANSICON" in os.environ
|
|
|
|
)
|
2017-05-24 18:03:07 -04:00
|
|
|
|
|
|
|
atty_connected = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
|
|
|
|
return supported and atty_connected
|
|
|
|
|
|
|
|
|
2016-02-07 19:18:16 -05:00
|
|
|
def colorize(text, color):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Wrap a string so that it displays in a particular color.
|
|
|
|
|
|
|
|
This function adds a prefix and suffix to a text string so that it is
|
|
|
|
displayed as a particular color, either in command prompt or the terminal.
|
|
|
|
|
2017-05-24 18:03:07 -04:00
|
|
|
If the running terminal or command prompt does not support color, the
|
|
|
|
original text is returned without being wrapped.
|
|
|
|
|
2017-05-17 23:17:37 -04:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
text : str
|
|
|
|
The message to display.
|
|
|
|
color : str
|
|
|
|
The color string prefix to put before the text.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
wrapped_str : str
|
2017-05-24 18:03:07 -04:00
|
|
|
The wrapped string to display in color, if possible.
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
|
2017-05-24 18:03:07 -04:00
|
|
|
if not supports_color():
|
|
|
|
return text
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
return color + text + Colors.ENDC
|
|
|
|
|
|
|
|
|
|
|
|
def print_success(text):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Print a success message.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
text : str
|
|
|
|
The message to display.
|
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
print(colorize(text, Colors.SUCCESS))
|
2013-02-17 14:49:16 -07:00
|
|
|
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
def print_failure(text):
|
2017-05-17 23:17:37 -04:00
|
|
|
"""
|
|
|
|
Print a failure message.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
text : str
|
|
|
|
The message to display.
|
|
|
|
"""
|
|
|
|
|
2017-05-15 11:51:17 -04:00
|
|
|
print(colorize(text, Colors.FAIL))
|
2019-07-13 13:40:13 +02:00
|
|
|
|
|
|
|
|
2013-02-17 13:51:49 -07:00
|
|
|
# End Helper Functions
|
2013-02-09 19:55:04 -07:00
|
|
|
|
2017-05-15 15:00:16 -04:00
|
|
|
|
2013-02-09 19:55:04 -07:00
|
|
|
if __name__ == "__main__":
|
2016-12-18 10:55:35 -05:00
|
|
|
main()
|