diff options
Diffstat (limited to 'squeeze.c')
-rw-r--r-- | squeeze.c | 546 |
1 files changed, 0 insertions, 546 deletions
diff --git a/squeeze.c b/squeeze.c deleted file mode 100644 index 09e7e2e..0000000 --- a/squeeze.c +++ /dev/null @@ -1,546 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "squeeze.h" - -#include <assert.h> -#include <math.h> -#include <stdio.h> - -#include "blocksplitter.h" -#include "deflate.h" -#include "tree.h" -#include "util.h" - -typedef struct SymbolStats { - /* The literal and length symbols. */ - size_t litlens[288]; - /* The 32 unique dist symbols, not the 32768 possible dists. */ - size_t dists[32]; - - double ll_symbols[288]; /* Length of each lit/len symbol in bits. */ - double d_symbols[32]; /* Length of each dist symbol in bits. */ -} SymbolStats; - -/* Sets everything to 0. */ -static void InitStats(SymbolStats* stats) { - memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0])); - memset(stats->dists, 0, 32 * sizeof(stats->dists[0])); - - memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0])); - memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0])); -} - -static void CopyStats(SymbolStats* source, SymbolStats* dest) { - memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0])); - memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0])); - - memcpy(dest->ll_symbols, source->ll_symbols, - 288 * sizeof(dest->ll_symbols[0])); - memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0])); -} - -/* Adds the bit lengths. */ -static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1, - const SymbolStats* stats2, double w2, - SymbolStats* result) { - size_t i; - for (i = 0; i < 288; i++) { - result->litlens[i] = - (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2); - } - for (i = 0; i < 32; i++) { - result->dists[i] = - (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2); - } - result->litlens[256] = 1; /* End symbol. */ -} - -typedef struct RanState { - unsigned int m_w, m_z; -} RanState; - -static void InitRanState(RanState* state) { - state->m_w = 1; - state->m_z = 2; -} - -/* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */ -static unsigned int Ran(RanState* state) { - state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16); - state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16); - return (state->m_z << 16) + state->m_w; /* 32-bit result. */ -} - -static void RandomizeFreqs(RanState* state, size_t* freqs, int n) { - int i; - for (i = 0; i < n; i++) { - if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n]; - } -} - -static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) { - RandomizeFreqs(state, stats->litlens, 288); - RandomizeFreqs(state, stats->dists, 32); - stats->litlens[256] = 1; /* End symbol. */ -} - -static void ClearStatFreqs(SymbolStats* stats) { - size_t i; - for (i = 0; i < 288; i++) stats->litlens[i] = 0; - for (i = 0; i < 32; i++) stats->dists[i] = 0; -} - -/* -Function that calculates a cost based on a model for the given LZ77 symbol. -litlen: means literal symbol if dist is 0, length otherwise. -*/ -typedef double CostModelFun(unsigned litlen, unsigned dist, void* context); - -/* -Cost model which should exactly match fixed tree. -type: CostModelFun -*/ -static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) { - (void)unused; - if (dist == 0) { - if (litlen <= 143) return 8; - else return 9; - } else { - int dbits = ZopfliGetDistExtraBits(dist); - int lbits = ZopfliGetLengthExtraBits(litlen); - int lsym = ZopfliGetLengthSymbol(litlen); - double cost = 0; - if (lsym <= 279) cost += 7; - else cost += 8; - cost += 5; /* Every dist symbol has length 5. */ - return cost + dbits + lbits; - } -} - -/* -Cost model based on symbol statistics. -type: CostModelFun -*/ -static double GetCostStat(unsigned litlen, unsigned dist, void* context) { - SymbolStats* stats = (SymbolStats*)context; - if (dist == 0) { - return stats->ll_symbols[litlen]; - } else { - int lsym = ZopfliGetLengthSymbol(litlen); - int lbits = ZopfliGetLengthExtraBits(litlen); - int dsym = ZopfliGetDistSymbol(dist); - int dbits = ZopfliGetDistExtraBits(dist); - return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits; - } -} - -/* -Finds the minimum possible cost this cost model can return for valid length and -distance symbols. -*/ -static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) { - double mincost; - int bestlength = 0; /* length that has lowest cost in the cost model */ - int bestdist = 0; /* distance that has lowest cost in the cost model */ - int i; - /* - Table of distances that have a different distance symbol in the deflate - specification. Each value is the first distance that has a new symbol. Only - different symbols affect the cost model so only these need to be checked. - See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes). - */ - static const int dsymbols[30] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, - 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 - }; - - mincost = ZOPFLI_LARGE_FLOAT; - for (i = 3; i < 259; i++) { - double c = costmodel(i, 1, costcontext); - if (c < mincost) { - bestlength = i; - mincost = c; - } - } - - mincost = ZOPFLI_LARGE_FLOAT; - for (i = 0; i < 30; i++) { - double c = costmodel(3, dsymbols[i], costcontext); - if (c < mincost) { - bestdist = dsymbols[i]; - mincost = c; - } - } - - return costmodel(bestlength, bestdist, costcontext); -} - -/* -Performs the forward pass for "squeeze". Gets the most optimal length to reach -every byte from a previous byte, using cost calculations. -s: the ZopfliBlockState -in: the input data array -instart: where to start -inend: where to stop (not inclusive) -costmodel: function to calculate the cost of some lit/len/dist pair. -costcontext: abstract context for the costmodel function -length_array: output array of size (inend - instart) which will receive the best - length to reach this byte from a previous byte. -returns the cost that was, according to the costmodel, needed to get to the end. -*/ -static double GetBestLengths(ZopfliBlockState *s, - const unsigned char* in, - size_t instart, size_t inend, - CostModelFun* costmodel, void* costcontext, - unsigned short* length_array) { - /* Best cost to get here so far. */ - size_t blocksize = inend - instart; - float* costs; - size_t i = 0, k; - unsigned short leng; - unsigned short dist; - unsigned short sublen[259]; - size_t windowstart = instart > ZOPFLI_WINDOW_SIZE - ? instart - ZOPFLI_WINDOW_SIZE : 0; - ZopfliHash hash; - ZopfliHash* h = &hash; - double result; - double mincost = GetCostModelMinCost(costmodel, costcontext); - - if (instart == inend) return 0; - - costs = (float*)malloc(sizeof(float) * (blocksize + 1)); - if (!costs) exit(-1); /* Allocation failed. */ - - ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); - ZopfliWarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - ZopfliUpdateHash(in, i, inend, h); - } - - for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT; - costs[0] = 0; /* Because it's the start. */ - length_array[0] = 0; - - for (i = instart; i < inend; i++) { - size_t j = i - instart; /* Index in the costs array and length_array. */ - ZopfliUpdateHash(in, i, inend, h); - -#ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS - /* If we're in a long repetition of the same character and have more than - ZOPFLI_MAX_MATCH characters before and after our position. */ - if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2 - && i > instart + ZOPFLI_MAX_MATCH + 1 - && i + ZOPFLI_MAX_MATCH * 2 + 1 < inend - && h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK] - > ZOPFLI_MAX_MATCH) { - double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext); - /* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to - the cost corresponding to that length. Doing this, we skip - ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */ - for (k = 0; k < ZOPFLI_MAX_MATCH; k++) { - costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost; - length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH; - i++; - j++; - ZopfliUpdateHash(in, i, inend, h); - } - } -#endif - - ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen, - &dist, &leng); - - /* Literal. */ - if (i + 1 <= inend) { - double newCost = costs[j] + costmodel(in[i], 0, costcontext); - assert(newCost >= 0); - if (newCost < costs[j + 1]) { - costs[j + 1] = newCost; - length_array[j + 1] = 1; - } - } - /* Lengths. */ - for (k = 3; k <= leng && i + k <= inend; k++) { - double newCost; - - /* Calling the cost model is expensive, avoid this if we are already at - the minimum possible cost that it can return. */ - if (costs[j + k] - costs[j] <= mincost) continue; - - newCost = costs[j] + costmodel(k, sublen[k], costcontext); - assert(newCost >= 0); - if (newCost < costs[j + k]) { - assert(k <= ZOPFLI_MAX_MATCH); - costs[j + k] = newCost; - length_array[j + k] = k; - } - } - } - - assert(costs[blocksize] >= 0); - result = costs[blocksize]; - - ZopfliCleanHash(h); - free(costs); - - return result; -} - -/* -Calculates the optimal path of lz77 lengths to use, from the calculated -length_array. The length_array must contain the optimal length to reach that -byte. The path will be filled with the lengths to use, so its data size will be -the amount of lz77 symbols. -*/ -static void TraceBackwards(size_t size, const unsigned short* length_array, - unsigned short** path, size_t* pathsize) { - size_t index = size; - if (size == 0) return; - for (;;) { - ZOPFLI_APPEND_DATA(length_array[index], path, pathsize); - assert(length_array[index] <= index); - assert(length_array[index] <= ZOPFLI_MAX_MATCH); - assert(length_array[index] != 0); - index -= length_array[index]; - if (index == 0) break; - } - - /* Mirror result. */ - for (index = 0; index < *pathsize / 2; index++) { - unsigned short temp = (*path)[index]; - (*path)[index] = (*path)[*pathsize - index - 1]; - (*path)[*pathsize - index - 1] = temp; - } -} - -static void FollowPath(ZopfliBlockState* s, - const unsigned char* in, size_t instart, size_t inend, - unsigned short* path, size_t pathsize, - ZopfliLZ77Store* store) { - size_t i, j, pos = 0; - size_t windowstart = instart > ZOPFLI_WINDOW_SIZE - ? instart - ZOPFLI_WINDOW_SIZE : 0; - - size_t total_length_test = 0; - - ZopfliHash hash; - ZopfliHash* h = &hash; - - if (instart == inend) return; - - ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); - ZopfliWarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - ZopfliUpdateHash(in, i, inend, h); - } - - pos = instart; - for (i = 0; i < pathsize; i++) { - unsigned short length = path[i]; - unsigned short dummy_length; - unsigned short dist; - assert(pos < inend); - - ZopfliUpdateHash(in, pos, inend, h); - - /* Add to output. */ - if (length >= ZOPFLI_MIN_MATCH) { - /* Get the distance by recalculating longest match. The found length - should match the length from the path. */ - ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0, - &dist, &dummy_length); - assert(!(dummy_length != length && length > 2 && dummy_length > 2)); - ZopfliVerifyLenDist(in, inend, pos, dist, length); - ZopfliStoreLitLenDist(length, dist, store); - total_length_test += length; - } else { - length = 1; - ZopfliStoreLitLenDist(in[pos], 0, store); - total_length_test++; - } - - - assert(pos + length <= inend); - for (j = 1; j < length; j++) { - ZopfliUpdateHash(in, pos + j, inend, h); - } - - pos += length; - } - - ZopfliCleanHash(h); -} - -/* Calculates the entropy of the statistics */ -static void CalculateStatistics(SymbolStats* stats) { - ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols); - ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols); -} - -/* Appends the symbol statistics from the store. */ -static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) { - size_t i; - for (i = 0; i < store->size; i++) { - if (store->dists[i] == 0) { - stats->litlens[store->litlens[i]]++; - } else { - stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++; - stats->dists[ZopfliGetDistSymbol(store->dists[i])]++; - } - } - stats->litlens[256] = 1; /* End symbol. */ - - CalculateStatistics(stats); -} - -/* -Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs -with updated statistics should be performed. - -s: the block state -in: the input data array -instart: where to start -inend: where to stop (not inclusive) -path: pointer to dynamically allocated memory to store the path -pathsize: pointer to the size of the dynamic path array -length_array: array if size (inend - instart) used to store lengths -costmodel: function to use as the cost model for this squeeze run -costcontext: abstract context for the costmodel function -store: place to output the LZ77 data -returns the cost that was, according to the costmodel, needed to get to the end. - This is not the actual cost. -*/ -static double LZ77OptimalRun(ZopfliBlockState* s, - const unsigned char* in, size_t instart, size_t inend, - unsigned short** path, size_t* pathsize, - unsigned short* length_array, CostModelFun* costmodel, - void* costcontext, ZopfliLZ77Store* store) { - double cost = GetBestLengths( - s, in, instart, inend, costmodel, costcontext, length_array); - free(*path); - *path = 0; - *pathsize = 0; - TraceBackwards(inend - instart, length_array, path, pathsize); - FollowPath(s, in, instart, inend, *path, *pathsize, store); - assert(cost < ZOPFLI_LARGE_FLOAT); - return cost; -} - -void ZopfliLZ77Optimal(ZopfliBlockState *s, - const unsigned char* in, size_t instart, size_t inend, - ZopfliLZ77Store* store) { - /* Dist to get to here with smallest cost. */ - size_t blocksize = inend - instart; - unsigned short* length_array = - (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); - unsigned short* path = 0; - size_t pathsize = 0; - ZopfliLZ77Store currentstore; - SymbolStats stats, beststats, laststats; - int i; - double cost; - double bestcost = ZOPFLI_LARGE_FLOAT; - double lastcost = 0; - /* Try randomizing the costs a bit once the size stabilizes. */ - RanState ran_state; - int lastrandomstep = -1; - - if (!length_array) exit(-1); /* Allocation failed. */ - - InitRanState(&ran_state); - InitStats(&stats); - ZopfliInitLZ77Store(¤tstore); - - /* Do regular deflate, then loop multiple shortest path runs, each time using - the statistics of the previous run. */ - - /* Initial run. */ - ZopfliLZ77Greedy(s, in, instart, inend, ¤tstore); - GetStatistics(¤tstore, &stats); - - /* Repeat statistics with each time the cost model from the previous stat - run. */ - for (i = 0; i < s->options->numiterations; i++) { - ZopfliCleanLZ77Store(¤tstore); - ZopfliInitLZ77Store(¤tstore); - LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, - length_array, GetCostStat, (void*)&stats, - ¤tstore); - cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists, - 0, currentstore.size, 2); - if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) { - fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost); - } - if (cost < bestcost) { - /* Copy to the output store. */ - ZopfliCopyLZ77Store(¤tstore, store); - CopyStats(&stats, &beststats); - bestcost = cost; - } - CopyStats(&stats, &laststats); - ClearStatFreqs(&stats); - GetStatistics(¤tstore, &stats); - if (lastrandomstep != -1) { - /* This makes it converge slower but better. Do it only once the - randomness kicks in so that if the user does few iterations, it gives a - better result sooner. */ - AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats); - CalculateStatistics(&stats); - } - if (i > 5 && cost == lastcost) { - CopyStats(&beststats, &stats); - RandomizeStatFreqs(&ran_state, &stats); - CalculateStatistics(&stats); - lastrandomstep = i; - } - lastcost = cost; - } - - free(length_array); - free(path); - ZopfliCleanLZ77Store(¤tstore); -} - -void ZopfliLZ77OptimalFixed(ZopfliBlockState *s, - const unsigned char* in, - size_t instart, size_t inend, - ZopfliLZ77Store* store) -{ - /* Dist to get to here with smallest cost. */ - size_t blocksize = inend - instart; - unsigned short* length_array = - (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); - unsigned short* path = 0; - size_t pathsize = 0; - - if (!length_array) exit(-1); /* Allocation failed. */ - - s->blockstart = instart; - s->blockend = inend; - - /* Shortest path for fixed tree This one should give the shortest possible - result for fixed tree, no repeated runs are needed since the tree is known. */ - LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, - length_array, GetCostFixed, 0, store); - - free(length_array); - free(path); -} |