Alger logo
𝔸𝕝𝕘𝕖𝕣
About

Entropy

#!/usr/bin/env python3

"""
Implementation of entropy of information
https://en.wikipedia.org/wiki/Entropy_(information_theory)
"""
from __future__ import annotations

import math
from collections import Counter
from string import ascii_lowercase


def calculate_prob(text: str) -> None:
    """
    This method takes path and two dict as argument
    and than calculates entropy of them.
    :param dict:
    :param dict:
    :return: Prints
    1) Entropy of information based on 1 alphabet
    2) Entropy of information based on couples of 2 alphabet
    3) print Entropy of H(X n∣Xn−1)

    Text from random books. Also, random quotes.
    >>> text = ("Behind Winston’s back the voice "
    ...         "from the telescreen was still "
    ...         "babbling and the overfulfilment")
    >>> calculate_prob(text)
    4.0
    6.0
    2.0

    >>> text = ("The Ministry of Truth—Minitrue, in Newspeak [Newspeak was the official"
    ...         "face in elegant lettering, the three")
    >>> calculate_prob(text)
    4.0
    5.0
    1.0
    >>> text = ("Had repulsive dashwoods suspicion sincerity but advantage now him. "
    ...         "Remark easily garret nor nay.  Civil those mrs enjoy shy fat merry. "
    ...         "You greatest jointure saw horrible. He private he on be imagine "
    ...         "suppose. Fertile beloved evident through no service elderly is. Blind "
    ...         "there if every no so at. Own neglected you preferred way sincerity "
    ...         "delivered his attempted. To of message cottage windows do besides "
    ...         "against uncivil.  Delightful unreserved impossible few estimating "
    ...         "men favourable see entreaties. She propriety immediate was improving. "
    ...         "He or entrance humoured likewise moderate. Much nor game son say "
    ...         "feel. Fat make met can must form into gate. Me we offending prevailed "
    ...         "discovery.")
    >>> calculate_prob(text)
    4.0
    7.0
    3.0
    """
    single_char_strings, two_char_strings = analyze_text(text)
    my_alphas = list(" " + ascii_lowercase)
    # what is our total sum of probabilities.
    all_sum = sum(single_char_strings.values())

    # one length string
    my_fir_sum = 0
    # for each alpha we go in our dict and if it is in it we calculate entropy
    for ch in my_alphas:
        if ch in single_char_strings:
            my_str = single_char_strings[ch]
            prob = my_str / all_sum
            my_fir_sum += prob * math.log2(prob)  # entropy formula.

    # print entropy
    print(f"{round(-1 * my_fir_sum):.1f}")

    # two len string
    all_sum = sum(two_char_strings.values())
    my_sec_sum = 0
    # for each alpha (two in size) calculate entropy.
    for ch0 in my_alphas:
        for ch1 in my_alphas:
            sequence = ch0 + ch1
            if sequence in two_char_strings:
                my_str = two_char_strings[sequence]
                prob = int(my_str) / all_sum
                my_sec_sum += prob * math.log2(prob)

    # print second entropy
    print(f"{round(-1 * my_sec_sum):.1f}")

    # print the difference between them
    print(f"{round((-1 * my_sec_sum) - (-1 * my_fir_sum)):.1f}")


def analyze_text(text: str) -> tuple[dict, dict]:
    """
    Convert text input into two dicts of counts.
    The first dictionary stores the frequency of single character strings.
    The second dictionary stores the frequency of two character strings.
    """
    single_char_strings = Counter()  # type: ignore
    two_char_strings = Counter()  # type: ignore
    single_char_strings[text[-1]] += 1

    # first case when we have space at start.
    two_char_strings[" " + text[0]] += 1
    for i in range(0, len(text) - 1):
        single_char_strings[text[i]] += 1
        two_char_strings[text[i : i + 2]] += 1
    return single_char_strings, two_char_strings


def main():
    import doctest

    doctest.testmod()
    # text = (
    #     "Had repulsive dashwoods suspicion sincerity but advantage now him. Remark "
    #     "easily garret nor nay. Civil those mrs enjoy shy fat merry. You greatest "
    #     "jointure saw horrible. He private he on be imagine suppose. Fertile "
    #     "beloved evident through no service elderly is. Blind there if every no so "
    #     "at. Own neglected you preferred way sincerity delivered his attempted. To "
    #     "of message cottage windows do besides against uncivil.  Delightful "
    #     "unreserved impossible few estimating men favourable see entreaties. She "
    #     "propriety immediate was improving. He or entrance humoured likewise "
    #     "moderate. Much nor game son say feel. Fat make met can must form into "
    #     "gate. Me we offending prevailed discovery. "
    # )

    # calculate_prob(text)


if __name__ == "__main__":
    main()
Algerlogo

© Alger 2022

About us

We are a group of programmers helping each other build new things, whether it be writing complex encryption programs, or simple ciphers. Our goal is to work together to document and model beautiful, helpful and interesting algorithms using code. We are an open-source community - anyone can contribute. We check each other's work, communicate and collaborate to solve problems. We strive to be welcoming, respectful, yet make sure that our code follows the latest programming guidelines.