You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

#### 86 lines 1.8 KiB Raw Normal View History

 2 years ago `package compress` ``` ``` `import "math"` ``` ``` ```// Estimate returns a normalized compressibility estimate of block b. ``` ```// Values close to zero are likely uncompressible. ``` ```// Values above 0.1 are likely to be compressible. ``` ```// Values above 0.5 are very compressible. ``` ```// Very small lengths will return 0. ``` `func Estimate(b []byte) float64 {` ` if len(b) < 16 {` ` return 0` ` }` ``` ``` ``` // Correctly predicted order 1 ``` ` hits := 0` ` lastMatch := false` ` var o1 [256]byte` ` var hist [256]int` ` c1 := byte(0)` ` for _, c := range b {` ` if c == o1[c1] {` ``` // We only count a hit if there was two correct predictions in a row. ``` ` if lastMatch {` ` hits++` ` }` ` lastMatch = true` ` } else {` ` lastMatch = false` ` }` ` o1[c1] = c` ` c1 = c` ` hist[c]++` ` }` ``` ``` ``` // Use x^0.6 to give better spread ``` ` prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)` ``` ``` ``` // Calculate histogram distribution ``` ` variance := float64(0)` ` avg := float64(len(b)) / 256` ``` ``` ` for _, v := range hist {` ` Δ := float64(v) - avg` ` variance += Δ * Δ` ` }` ``` ``` ` stddev := math.Sqrt(float64(variance)) / float64(len(b))` ` exp := math.Sqrt(1 / float64(len(b)))` ``` ``` ``` // Subtract expected stddev ``` ` stddev -= exp` ` if stddev < 0 {` ` stddev = 0` ` }` ` stddev *= 1 + exp` ``` ``` ``` // Use x^0.4 to give better spread ``` ` entropy := math.Pow(stddev, 0.4)` ``` ``` ``` // 50/50 weight between prediction and histogram distribution ``` ` return math.Pow((prediction+entropy)/2, 0.9)` `}` ``` ``` ```// ShannonEntropyBits returns the number of bits minimum required to represent ``` ```// an entropy encoding of the input bytes. ``` ```// https://en.wiktionary.org/wiki/Shannon_entropy ``` `func ShannonEntropyBits(b []byte) int {` ` if len(b) == 0 {` ` return 0` ` }` ` var hist [256]int` ` for _, c := range b {` ` hist[c]++` ` }` ` shannon := float64(0)` ` invTotal := 1.0 / float64(len(b))` ` for _, v := range hist[:] {` ` if v > 0 {` ` n := float64(v)` ` shannon += math.Ceil(-math.Log2(n*invTotal) * n)` ` }` ` }` ` return int(math.Ceil(shannon))` `}`