Entropy.java
/**
* Demonstrates how to calculate the entropy of a file, or basically a measure
* of file randomness. This can be useful when determining how "compressible"
* something is, although it is worth noting that it is just an estimate. Some
* algorithms may not be able to reach optimal reduction in size, while
* domain-specific compression may be able to do even better. Caveat emptor.
*/
import java.nio.file.Files;
import java.nio.file.Paths;
public class Entropy {
public static void main(String[] args)
throws Exception {
byte[] test = Files.readAllBytes(Paths.get(args[0]));
double entr = entropy(test);
System.out.println("Entropy (bits per byte): "
+ String.format("%.2f", entr));
System.out.println("Optimal Size Reduction: "
+ String.format("%.0f%%", (1 - (entr / 8)) * 100));
System.out.println("Optimal Compression Ratio: "
+ String.format("%.0f:1", 8 / entr));
}
/**
* Calculates the entropy per character/byte of a byte array.
*
* @param input array to calculate entropy of
*
* @return entropy bits per byte
*/
public static double entropy(byte[] input) {
if (input.length == 0) {
return 0.0;
}
/* Total up the occurrences of each byte */
int[] charCounts = new int[256];
for (byte b : input) {
charCounts[b & 0xFF]++;
}
double entropy = 0.0;
for (int i = 0; i < 256; ++i) {
if (charCounts[i] == 0.0) {
continue;
}
double freq = (double) charCounts[i] / input.length;
entropy -= freq * (Math.log(freq) / Math.log(2));
}
return entropy;
}
}