diff options
Diffstat (limited to 'lz77.c')
-rw-r--r-- | lz77.c | 482 |
1 files changed, 0 insertions, 482 deletions
@@ -1,482 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "lz77.h" -#include "util.h" - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> - -void ZopfliInitLZ77Store(ZopfliLZ77Store* store) { - store->size = 0; - store->litlens = 0; - store->dists = 0; -} - -void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) { - free(store->litlens); - free(store->dists); -} - -void ZopfliCopyLZ77Store( - const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) { - size_t i; - ZopfliCleanLZ77Store(dest); - dest->litlens = - (unsigned short*)malloc(sizeof(*dest->litlens) * source->size); - dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size); - - if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */ - - dest->size = source->size; - for (i = 0; i < source->size; i++) { - dest->litlens[i] = source->litlens[i]; - dest->dists[i] = source->dists[i]; - } -} - -/* -Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store. -context must be a ZopfliLZ77Store*. -*/ -void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist, - ZopfliLZ77Store* store) { - size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */ - ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size); - ZOPFLI_APPEND_DATA(dist, &store->dists, &size2); -} - -/* -Gets a score of the length given the distance. Typically, the score of the -length is the length itself, but if the distance is very long, decrease the -score of the length a bit to make up for the fact that long distances use large -amounts of extra bits. - -This is not an accurate score, it is a heuristic only for the greedy LZ77 -implementation. More accurate cost models are employed later. Making this -heuristic more accurate may hurt rather than improve compression. - -The two direct uses of this heuristic are: --avoid using a length of 3 in combination with a long distance. This only has - an effect if length == 3. --make a slightly better choice between the two options of the lazy matching. - -Indirectly, this affects: --the block split points if the default of block splitting first is used, in a - rather unpredictable way --the first zopfli run, so it affects the chance of the first run being closer - to the optimal output -*/ -static int GetLengthScore(int length, int distance) { - /* - At 1024, the distance uses 9+ extra bits and this seems to be the sweet spot - on tested files. - */ - return distance > 1024 ? length - 1 : length; -} - -void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, - unsigned short dist, unsigned short length) { - - /* TODO(lode): make this only run in a debug compile, it's for assert only. */ - size_t i; - - assert(pos + length <= datasize); - for (i = 0; i < length; i++) { - if (data[pos - dist + i] != data[pos + i]) { - assert(data[pos - dist + i] == data[pos + i]); - break; - } - } -} - -/* -Finds how long the match of scan and match is. Can be used to find how many -bytes starting from scan, and from match, are equal. Returns the last byte -after scan, which is still equal to the correspondinb byte after match. -scan is the position to compare -match is the earlier position to compare. -end is the last possible byte, beyond which to stop looking. -safe_end is a few (8) bytes before end, for comparing multiple bytes at once. -*/ -static const unsigned char* GetMatch(const unsigned char* scan, - const unsigned char* match, - const unsigned char* end, - const unsigned char* safe_end) { - - if (sizeof(size_t) == 8) { - /* 8 checks at once per array bounds check (size_t is 64-bit). */ - while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) { - scan += 8; - match += 8; - } - } else if (sizeof(unsigned int) == 4) { - /* 4 checks at once per array bounds check (unsigned int is 32-bit). */ - while (scan < safe_end - && *((unsigned int*)scan) == *((unsigned int*)match)) { - scan += 4; - match += 4; - } - } else { - /* do 8 checks at once per array bounds check. */ - while (scan < safe_end && *scan == *match && *++scan == *++match - && *++scan == *++match && *++scan == *++match - && *++scan == *++match && *++scan == *++match - && *++scan == *++match && *++scan == *++match) { - scan++; match++; - } - } - - /* The remaining few bytes. */ - while (scan != end && *scan == *match) { - scan++; match++; - } - - return scan; -} - -#ifdef ZOPFLI_LONGEST_MATCH_CACHE -/* -Gets distance, length and sublen values from the cache if possible. -Returns 1 if it got the values from the cache, 0 if not. -Updates the limit value to a smaller one if possible with more limited -information from the cache. -*/ -static int TryGetFromLongestMatchCache(ZopfliBlockState* s, - size_t pos, size_t* limit, - unsigned short* sublen, unsigned short* distance, unsigned short* length) { - /* The LMC cache starts at the beginning of the block rather than the - beginning of the whole array. */ - size_t lmcpos = pos - s->blockstart; - - /* Length > 0 and dist 0 is invalid combination, which indicates on purpose - that this cache value is not filled in yet. */ - unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || - s->lmc->dist[lmcpos] != 0); - unsigned char limit_ok_for_cache = cache_available && - (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit || - (sublen && ZopfliMaxCachedSublen(s->lmc, - lmcpos, s->lmc->length[lmcpos]) >= *limit)); - - if (s->lmc && limit_ok_for_cache && cache_available) { - if (!sublen || s->lmc->length[lmcpos] - <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) { - *length = s->lmc->length[lmcpos]; - if (*length > *limit) *length = *limit; - if (sublen) { - ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen); - *distance = sublen[*length]; - if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) { - assert(sublen[*length] == s->lmc->dist[lmcpos]); - } - } else { - *distance = s->lmc->dist[lmcpos]; - } - return 1; - } - /* Can't use much of the cache, since the "sublens" need to be calculated, - but at least we already know when to stop. */ - *limit = s->lmc->length[lmcpos]; - } - - return 0; -} - -/* -Stores the found sublen, distance and length in the longest match cache, if -possible. -*/ -static void StoreInLongestMatchCache(ZopfliBlockState* s, - size_t pos, size_t limit, - const unsigned short* sublen, - unsigned short distance, unsigned short length) { - /* The LMC cache starts at the beginning of the block rather than the - beginning of the whole array. */ - size_t lmcpos = pos - s->blockstart; - - /* Length > 0 and dist 0 is invalid combination, which indicates on purpose - that this cache value is not filled in yet. */ - unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || - s->lmc->dist[lmcpos] != 0); - - if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) { - assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0); - s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance; - s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length; - assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0)); - ZopfliSublenToCache(sublen, lmcpos, length, s->lmc); - } -} -#endif - -void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h, - const unsigned char* array, - size_t pos, size_t size, size_t limit, - unsigned short* sublen, unsigned short* distance, unsigned short* length) { - unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp; - unsigned short bestdist = 0; - unsigned short bestlength = 1; - const unsigned char* scan; - const unsigned char* match; - const unsigned char* arrayend; - const unsigned char* arrayend_safe; -#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE - int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */ -#endif - - unsigned dist = 0; /* Not unsigned short on purpose. */ - - int* hhead = h->head; - unsigned short* hprev = h->prev; - int* hhashval = h->hashval; - int hval = h->val; - -#ifdef ZOPFLI_LONGEST_MATCH_CACHE - if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) { - assert(pos + *length <= size); - return; - } -#endif - - assert(limit <= ZOPFLI_MAX_MATCH); - assert(limit >= ZOPFLI_MIN_MATCH); - assert(pos < size); - - if (size - pos < ZOPFLI_MIN_MATCH) { - /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to - try. */ - *length = 0; - *distance = 0; - return; - } - - if (pos + limit > size) { - limit = size - pos; - } - arrayend = &array[pos] + limit; - arrayend_safe = arrayend - 8; - - assert(hval < 65536); - - pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */ - p = hprev[pp]; - - assert(pp == hpos); - - dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); - - /* Go through all distances. */ - while (dist < ZOPFLI_WINDOW_SIZE) { - unsigned short currentlength = 0; - - assert(p < ZOPFLI_WINDOW_SIZE); - assert(p == hprev[pp]); - assert(hhashval[p] == hval); - - if (dist > 0) { - assert(pos < size); - assert(dist <= pos); - scan = &array[pos]; - match = &array[pos - dist]; - - /* Testing the byte at position bestlength first, goes slightly faster. */ - if (pos + bestlength >= size - || *(scan + bestlength) == *(match + bestlength)) { - -#ifdef ZOPFLI_HASH_SAME - unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK]; - if (same0 > 2 && *scan == *match) { - unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK]; - unsigned short same = same0 < same1 ? same0 : same1; - if (same > limit) same = limit; - scan += same; - match += same; - } -#endif - scan = GetMatch(scan, match, arrayend, arrayend_safe); - currentlength = scan - &array[pos]; /* The found length. */ - } - - if (currentlength > bestlength) { - if (sublen) { - unsigned short j; - for (j = bestlength + 1; j <= currentlength; j++) { - sublen[j] = dist; - } - } - bestdist = dist; - bestlength = currentlength; - if (currentlength >= limit) break; - } - } - - -#ifdef ZOPFLI_HASH_SAME_HASH - /* Switch to the other hash once this will be more efficient. */ - if (hhead != h->head2 && bestlength >= h->same[hpos] && - h->val2 == h->hashval2[p]) { - /* Now use the hash that encodes the length and first byte. */ - hhead = h->head2; - hprev = h->prev2; - hhashval = h->hashval2; - hval = h->val2; - } -#endif - - pp = p; - p = hprev[p]; - if (p == pp) break; /* Uninited prev value. */ - - dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); - -#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE - chain_counter--; - if (chain_counter <= 0) break; -#endif - } - -#ifdef ZOPFLI_LONGEST_MATCH_CACHE - StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength); -#endif - - assert(bestlength <= limit); - - *distance = bestdist; - *length = bestlength; - assert(pos + *length <= size); -} - -void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in, - size_t instart, size_t inend, - ZopfliLZ77Store* store) { - size_t i = 0, j; - unsigned short leng; - unsigned short dist; - int lengthscore; - size_t windowstart = instart > ZOPFLI_WINDOW_SIZE - ? instart - ZOPFLI_WINDOW_SIZE : 0; - unsigned short dummysublen[259]; - - ZopfliHash hash; - ZopfliHash* h = &hash; - -#ifdef ZOPFLI_LAZY_MATCHING - /* Lazy matching. */ - unsigned prev_length = 0; - unsigned prev_match = 0; - int prevlengthscore; - int match_available = 0; -#endif - - if (instart == inend) return; - - ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); - ZopfliWarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - ZopfliUpdateHash(in, i, inend, h); - } - - for (i = instart; i < inend; i++) { - ZopfliUpdateHash(in, i, inend, h); - - ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen, - &dist, &leng); - lengthscore = GetLengthScore(leng, dist); - -#ifdef ZOPFLI_LAZY_MATCHING - /* Lazy matching. */ - prevlengthscore = GetLengthScore(prev_length, prev_match); - if (match_available) { - match_available = 0; - if (lengthscore > prevlengthscore + 1) { - ZopfliStoreLitLenDist(in[i - 1], 0, store); - if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) { - match_available = 1; - prev_length = leng; - prev_match = dist; - continue; - } - } else { - /* Add previous to output. */ - leng = prev_length; - dist = prev_match; - lengthscore = prevlengthscore; - /* Add to output. */ - ZopfliVerifyLenDist(in, inend, i - 1, dist, leng); - ZopfliStoreLitLenDist(leng, dist, store); - for (j = 2; j < leng; j++) { - assert(i < inend); - i++; - ZopfliUpdateHash(in, i, inend, h); - } - continue; - } - } - else if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) { - match_available = 1; - prev_length = leng; - prev_match = dist; - continue; - } - /* End of lazy matching. */ -#endif - - /* Add to output. */ - if (lengthscore >= ZOPFLI_MIN_MATCH) { - ZopfliVerifyLenDist(in, inend, i, dist, leng); - ZopfliStoreLitLenDist(leng, dist, store); - } else { - leng = 1; - ZopfliStoreLitLenDist(in[i], 0, store); - } - for (j = 1; j < leng; j++) { - assert(i < inend); - i++; - ZopfliUpdateHash(in, i, inend, h); - } - } - - ZopfliCleanHash(h); -} - -void ZopfliLZ77Counts(const unsigned short* litlens, - const unsigned short* dists, - size_t start, size_t end, - size_t* ll_count, size_t* d_count) { - size_t i; - - for (i = 0; i < 288; i++) { - ll_count[i] = 0; - } - for (i = 0; i < 32; i++) { - d_count[i] = 0; - } - - for (i = start; i < end; i++) { - if (dists[i] == 0) { - ll_count[litlens[i]]++; - } else { - ll_count[ZopfliGetLengthSymbol(litlens[i])]++; - d_count[ZopfliGetDistSymbol(dists[i])]++; - } - } - - ll_count[256] = 1; /* End symbol. */ -} |