hosts/updateReadme.py

#!/usr/bin/env python

# Script by Steven Black
# https://github.com/StevenBlack
#
# This Python script will update the readme files in this repo.

from string import Template

import os
import sys
import time
import json

# Project Settings
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILENAME = 'readme.md'
README_DATA_FILENAME = "readmeData.json"

# Detecting Python 3 for version-dependent implementations
PY3 = sys.version_info >= (3, 0)


def main():
    s = Template('${description} | [Readme](https://github.com/StevenBlack/'
                 'hosts/blob/master/${location}readme.md) | '
                 '[link](https://raw.githubusercontent.com/StevenBlack/'
                 'hosts/master/${location}hosts) | '
                 '${fmtentries} | '
                 '[link](http://sbc.io/hosts/${location}hosts)')
    with open(README_DATA_FILENAME, 'r') as f:
        data = json.load(f)

    if PY3:
        keys = list(data.keys())
    else:
        keys = data.keys()

    # Sort by the number of en-dashes in the key
    # and then by the key string itself.
    keys.sort(key=lambda item: (item.count("-"), item))

    toc_rows = ""
    for key in keys:
        data[key]["fmtentries"] = "{:,}".format(data[key]["entries"])
        if key == "base":
            data[key]["description"] = 'Unified hosts = **(adware + malware)**'
        else:
            data[key]["description"] = ('Unified hosts **+ ' +
                                        key.replace("-", " + ") + '**')

        toc_rows += s.substitute(data[key]) + "\n"

    row_defaults = {
        "name": "",
        "description": "",
        "homeurl": "",
        "frequency": "",
        "issues": "",
        "url": "",
        "license": "",
        "issues": "" }

    t = Template('${name} | ${description} |[link](${homeurl})'
                 ' | [raw](${url}) | ${frequency} | ${license}  | [issues](${issues}) ')

    for key in keys:
        extensions = key.replace("-", ", ")
        extensions_str = "* Extensions: **" + extensions + "**."
        extensions_header = "with " + extensions + " extensions"

        source_rows = ""
        source_list = data[key]["sourcesdata"]

        for source in source_list:
            this_row = {}
            this_row.update(row_defaults)
            this_row.update(source)
            source_rows += t.substitute(this_row) + "\n"

        with open(os.path.join(data[key]["location"],
                               README_FILENAME), "wt") as out:
            for line in open(README_TEMPLATE):
                line = line.replace('@GEN_DATE@', time.strftime("%B %d %Y",
                                                                time.gmtime()))
                line = line.replace('@EXTENSIONS@',
                                    decode_line(extensions_str))
                line = line.replace('@EXTENSIONS_HEADER@',
                                    decode_line(extensions_header))
                line = line.replace('@NUM_ENTRIES@',
                                    "{:,}".format(data[key]["entries"]))
                line = line.replace('@SUBFOLDER@',
                                    decode_line(os.path.join(
                                        data[key]["location"], '')))
                line = line.replace('@TOCROWS@',
                                    decode_line(toc_rows))
                line = line.replace('@SOURCEROWS@',
                                    decode_line(source_rows))
                out.write(decode_line(line))


def decode_line(line):
    """
    Python 2 compatible method for decoding unicode lines.

    Parameters
    ----------
    line : str
        The unicode string to decode.

    Returns
    -------
    decoded_str : str
        Decoded unicode string.
    """

    # Python 3.x has no unicode issues.
    if PY3:
        return line

    # The biggest Python 2.x compatibility issue is the decoding of the
    # en-dash. It either takes the form of u"\u2013" or "\xe2\x80\x93."
    #
    # This attempts to convert "\xe2\x80\x93" to u"\u2013" if necessary.
    # If the character is already in the form of u"\u2013," this will
    # raise an UnicodeEncodeError.
    #
    # In general, this line of code will allow us to convert unicode,
    # UTF-8 encoded characters into pure unicode.
    try:
        line = line.decode("UTF-8")
    except UnicodeEncodeError:
        pass

    # Replace u"\u2013" with the en-dash, so we now can decode.
    #
    # We can add additional "replace" lines in case there are other unicode
    # literals that Python 2.x cannot handle.
    line = line.replace(u"\u2013", "-")
    return str(line.decode("UTF-8"))


if __name__ == "__main__":
    main()