#include #include #include #include #include "ldm.h" #define LDM_MEMORY_USAGE 14 #define LDM_HASHLOG (LDM_MEMORY_USAGE-2) #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) #define LDM_HASH_SIZE_U32 (1 << (LDM_HASHLOG)) #define WINDOW_SIZE (1 << 20) #define MAX_WINDOW_SIZE 31 #define HASH_SIZE 4 #define MINMATCH 4 #define ML_BITS 4 #define ML_MASK ((1U<>8); } } static U32 LDM_read32(const void *ptr) { return *(const U32 *)ptr; } static U64 LDM_read64(const void *ptr) { return *(const U64 *)ptr; } static void LDM_copy8(void *dst, const void *src) { memcpy(dst, src, 8); } static void LDM_wild_copy(void *dstPtr, const void *srcPtr, void *dstEnd) { BYTE *d = (BYTE *)dstPtr; const BYTE *s = (const BYTE *)srcPtr; BYTE * const e = (BYTE *)dstEnd; do { LDM_copy8(d, s); d += 8; s += 8; } while (d < e); } struct hash_entry { U64 offset; tag t; }; static U32 LDM_hash(U32 sequence) { return ((sequence * 2654435761U) >> ((32)-LDM_HASHLOG)); } static U32 LDM_hash5(U64 sequence) { static const U64 prime5bytes = 889523592379ULL; static const U64 prime8bytes = 11400714785074694791ULL; const U32 hashLog = LDM_HASHLOG; if (LDM_isLittleEndian()) return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); else return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); } static U32 LDM_hash_position(const void * const p) { return LDM_hash(LDM_read32(p)); } static void LDM_put_position_on_hash(const BYTE *p, U32 h, void *tableBase, const BYTE *srcBase) { U32 *hashTable = (U32 *) tableBase; hashTable[h] = (U32)(p - srcBase); } static void LDM_put_position(const BYTE *p, void *tableBase, const BYTE *srcBase) { U32 const h = LDM_hash_position(p); LDM_put_position_on_hash(p, h, tableBase, srcBase); } static const BYTE *LDM_get_position_on_hash( U32 h, void *tableBase, const BYTE *srcBase) { const U32 * const hashTable = (U32*)tableBase; return hashTable[h] + srcBase; } static BYTE LDM_read_byte(const void *memPtr) { BYTE val; memcpy(&val, memPtr, 1); return val; } static unsigned LDM_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit) { const BYTE * const pStart = pIn; while (pIn < pInLimit - 1) { BYTE const diff = LDM_read_byte(pMatch) ^ LDM_read_byte(pIn); if (!diff) { pIn++; pMatch++; continue; } return (unsigned)(pIn - pStart); } return (unsigned)(pIn - pStart); } void LDM_read_header(void const *source, size_t *compressed_size, size_t *decompressed_size) { const U32 *ip = (const U32 *)source; *compressed_size = *ip++; *decompressed_size = *ip; } size_t LDM_compress(void const *source, void *dest, size_t source_size, size_t max_dest_size) { const BYTE * const istart = (const BYTE*)source; const BYTE *ip = istart; const BYTE * const iend = istart + source_size; const BYTE *ilimit = iend - HASH_SIZE; const BYTE * const matchlimit = iend - HASH_SIZE; const BYTE * const mflimit = iend - MINMATCH; BYTE *op = (BYTE*) dest; U32 hashTable[LDM_HASHTABLESIZE_U32]; memset(hashTable, 0, sizeof(hashTable)); const BYTE *anchor = (const BYTE *)source; // struct LDM_cctx cctx; size_t output_size = 0; U32 forwardH; /* Hash first byte: put into hash table */ LDM_put_position(ip, hashTable, istart); ip++; forwardH = LDM_hash_position(ip); //TODO Loop terminates before ip>=ilimit. while (ip < ilimit) { const BYTE *match; BYTE *token; /* Find a match */ { const BYTE *forwardIp = ip; unsigned step = 1; do { U32 const h = forwardH; ip = forwardIp; forwardIp += step; if (forwardIp > mflimit) { goto _last_literals; } match = LDM_get_position_on_hash(h, hashTable, istart); forwardH = LDM_hash_position(forwardIp); LDM_put_position_on_hash(ip, h, hashTable, istart); } while (ip - match > WINDOW_SIZE || LDM_read64(match) != LDM_read64(ip)); } // TODO catchup while (ip > anchor && match > istart && ip[-1] == match[-1]) { ip--; match--; } /* Encode literals */ { unsigned const litLength = (unsigned)(ip - anchor); token = op++; #ifdef LDM_DEBUG printf("Cur position: %zu\n", anchor - istart); printf("LitLength %zu. (Match offset). %zu\n", litLength, ip - match); #endif /* fwrite(match, 4, 1, stdout); printf("\n"); */ if (litLength >= RUN_MASK) { int len = (int)litLength - RUN_MASK; *token = (RUN_MASK << ML_BITS); for (; len >= 255; len -= 255) { *op++ = 255; } *op++ = (BYTE)len; } else { *token = (BYTE)(litLength << ML_BITS); } #ifdef LDM_DEBUG printf("Literals "); fwrite(anchor, litLength, 1, stdout); printf("\n"); #endif memcpy(op, anchor, litLength); //LDM_wild_copy(op, anchor, op + litLength); op += litLength; } _next_match: /* Encode offset */ { LDM_write32(op, ip - match); op += 4; } /* Encode Match Length */ { unsigned matchCode; matchCode = LDM_count(ip + MINMATCH, match + MINMATCH, matchlimit); #ifdef LDM_DEBUG printf("Match length %zu\n", matchCode + MINMATCH); fwrite(ip, MINMATCH + matchCode, 1, stdout); printf("\n"); #endif ip += MINMATCH + matchCode; if (matchCode >= ML_MASK) { *token += ML_MASK; matchCode -= ML_MASK; LDM_write32(op, 0xFFFFFFFF); while (matchCode >= 4*0xFF) { op += 4; LDM_write32(op, 0xffffffff); matchCode -= 4*0xFF; } op += matchCode / 255; *op++ = (BYTE)(matchCode % 255); } else { *token += (BYTE)(matchCode); } #ifdef LDM_DEBUG printf("\n"); #endif } anchor = ip; LDM_put_position(ip, hashTable, istart); forwardH = LDM_hash_position(++ip); } _last_literals: /* Encode last literals */ { size_t const lastRun = (size_t)(iend - anchor); if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; *op++ = RUN_MASK << ML_BITS; for(; accumulator >= 255; accumulator -= 255) { *op++ = 255; } *op++ = (BYTE)accumulator; } else { *op++ = (BYTE)(lastRun << ML_BITS); } memcpy(op, anchor, lastRun); op += lastRun; } return (op - (BYTE *)dest); } size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, size_t max_decompressed_size) { const BYTE *ip = (const BYTE *)source; const BYTE * const iend = ip + compressed_size; BYTE *op = (BYTE *)dest; BYTE * const oend = op + max_decompressed_size; BYTE *cpy; while (ip < iend) { size_t length; const BYTE *match; size_t offset; /* get literal length */ unsigned const token = *ip++; if ((length=(token >> ML_BITS)) == RUN_MASK) { unsigned s; do { s = *ip++; length += s; } while (s == 255); } #ifdef LDM_DEBUG printf("Literal length: %zu\n", length); #endif /* copy literals */ cpy = op + length; #ifdef LDM_DEBUG printf("Literals "); fwrite(ip, length, 1, stdout); printf("\n"); #endif memcpy(op, ip, length); // LDM_wild_copy(op, ip, cpy); ip += length; op = cpy; /* get offset */ offset = LDM_read32(ip); #ifdef LDM_DEBUG printf("Offset: %zu\n", offset); #endif ip += 4; match = op - offset; // LDM_write32(op, (U32)offset); /* get matchlength */ length = token & ML_MASK; if (length == ML_MASK) { unsigned s; do { s = *ip++; length += s; } while (s == 255); } length += MINMATCH; #ifdef LDM_DEBUG printf("Match length: %zu\n", length); #endif /* copy match */ cpy = op + length; // Inefficient for now while (match < cpy - offset && op < oend) { *op++ = *match++; } } // memcpy(dest, source, compressed_size); return op - (BYTE *)dest; }