Entropy.java

DownloadView Raw

/**
 * Demonstrates how to calculate the entropy of a file, or basically a measure
 * of file randomness. This can be useful when determining how "compressible"
 * something is, although it is worth noting that it is just an estimate. Some
 * algorithms may not be able to reach optimal reduction in size, while
 * domain-specific compression may be able to do even better. Caveat emptor.
 */

import java.nio.file.Files;
import java.nio.file.Paths;

public class Entropy {

    public static void main(String[] args)
    throws Exception {
        byte[] test = Files.readAllBytes(Paths.get(args[0]));
        double entr = entropy(test);
        System.out.println("Entropy (bits per byte): "
                + String.format("%.2f", entr));
        System.out.println("Optimal Size Reduction: "
                + String.format("%.0f%%", (1 - (entr / 8)) * 100));
        System.out.println("Optimal Compression Ratio: "
                + String.format("%.0f:1", 8 / entr));
    }

    /**
     * Calculates the entropy per character/byte of a byte array.
     *
     * @param input array to calculate entropy of
     *
     * @return entropy bits per byte
     */
    public static double entropy(byte[] input) {
        if (input.length == 0) {
            return 0.0;
        }

        /* Total up the occurrences of each byte */
        int[] charCounts = new int[256];
        for (byte b : input) {
            charCounts[b & 0xFF]++;
        }

        double entropy = 0.0;
        for (int i = 0; i < 256; ++i) {
            if (charCounts[i] == 0.0) {
                continue;
            }

            double freq = (double) charCounts[i] / input.length;
            entropy -= freq * (Math.log(freq) / Math.log(2));
        }

        return entropy;
    }

}