diff options
Diffstat (limited to 'lib/zstd/compress/fse_compress.c')
| -rw-r--r-- | lib/zstd/compress/fse_compress.c | 151 |
1 files changed, 76 insertions, 75 deletions
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c index 436985b620e5..44a3c10becf2 100644 --- a/lib/zstd/compress/fse_compress.c +++ b/lib/zstd/compress/fse_compress.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* ****************************************************************** * FSE : Finite State Entropy encoder - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -25,7 +26,8 @@ #include "../common/error_private.h" #define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MATH64 -#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ +#include "../common/zstd_deps.h" /* ZSTD_memset */ +#include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -75,13 +77,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); U32 const step = FSE_TABLESTEP(tableSize); + U32 const maxSV1 = maxSymbolValue+1; - U32* cumul = (U32*)workSpace; - FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + U16* cumul = (U16*)workSpace; /* size = maxSV1 */ + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ U32 highThreshold = tableSize-1; - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); /* CTable header */ tableU16[-2] = (U16) tableLog; @@ -89,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, assert(tableLog < 16); /* required for threshold strategy to work */ /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ #ifdef __clang_analyzer__ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ @@ -98,20 +101,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, /* symbol start positions */ { U32 u; cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { + for (u=1; u <= maxSV1; u++) { if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ cumul[u] = cumul[u-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + assert(normalizedCounter[u-1] >= 0); + cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; + assert(cumul[u] >= cumul[u-1]); /* no overflow */ } } - cumul[maxSymbolValue+1] = tableSize+1; + cumul[maxSV1] = (U16)(tableSize+1); } /* Spread symbols */ - { U32 position = 0; + if (highThreshold == tableSize - 1) { + /* Case for no low prob count symbols. Lay down 8 bytes at a time + * to reduce branch misses since we are operating on a small block + */ + BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ + { U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s<maxSV1; ++s, sv += add) { + int i; + int const n = normalizedCounter[s]; + MEM_write64(spread + pos, sv); + for (i = 8; i < n; i += 8) { + MEM_write64(spread + pos + i, sv); + } + assert(n>=0); + pos += (size_t)n; + } + } + /* Spread symbols across the table. Lack of lowprob symbols means that + * we don't need variable sized inner loop, so we can unroll the loop and + * reduce branch misses. + */ + { size_t position = 0; + size_t s; + size_t const unroll = 2; /* Experimentally determined optimal unroll */ + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableSymbol[uPosition] = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); /* Must have initialized all positions */ + } + } else { + U32 position = 0; U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { + for (symbol=0; symbol<maxSV1; symbol++) { int nbOccurrences; int const freq = normalizedCounter[symbol]; for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { @@ -120,7 +164,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ } } - assert(position==0); /* Must have initialized all positions */ } @@ -144,16 +187,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, case -1: case 1: symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); - symbolTT[s].deltaFindState = total - 1; + assert(total <= INT_MAX); + symbolTT[s].deltaFindState = (int)(total - 1); total ++; break; default : - { - U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); - U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; + assert(normalizedCounter[s] > 1); + { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); + U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = total - normalizedCounter[s]; - total += normalizedCounter[s]; + symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); + total += (unsigned)normalizedCounter[s]; } } } } #if 0 /* debug : symbol costs */ @@ -164,8 +208,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, symbol, normalizedCounter[symbol], FSE_getMaxNbBits(symbolTT, symbol), (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); - } - } + } } #endif return 0; @@ -173,16 +216,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, - #ifndef FSE_COMMONDEFS_ONLY - /*-************************************************************** * FSE NCount encoding ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog + + 4 /* bitCount initialized at 4 */ + + 2 /* first two symbols may use one additional bit each */) / 8) + + 1 /* round up to whole nb bytes */ + + 2 /* additional two bytes for bitstream flush */; return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } @@ -211,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, /* Init */ remaining = tableSize+1; /* +1 for extra accuracy */ threshold = tableSize; - nbBits = tableLog+1; + nbBits = (int)tableLog+1; while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ if (previousIs0) { @@ -230,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, } while (symbol >= start+3) { start+=3; - bitStream += 3 << bitCount; + bitStream += 3U << bitCount; bitCount += 2; } bitStream += (symbol-start) << bitCount; @@ -250,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, count++; /* +1 for extra accuracy */ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; + bitStream += (U32)count << bitCount; bitCount += nbBits; bitCount -= (count<max); previousIs0 = (count==1); @@ -278,7 +323,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, out[1] = (BYTE)(bitStream>>8); out+= (bitCount+7) /8; - return (out-ostart); + assert(out >= ostart); + return (size_t)(out-ostart); } @@ -299,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, * FSE Compression Code ****************************************************************/ -FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) -{ - size_t size; - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)ZSTD_malloc(size); -} - -void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } - /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; assert(srcSize > 1); /* Not supported, RLE should be used instead */ return minBits; @@ -321,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; U32 tableLog = maxTableLog; U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); assert(srcSize > 1); /* Not supported, RLE should be used instead */ @@ -489,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, return tableLog; } - -/* fake FSE_CTable, for raw (uncompressed) input */ -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) -{ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - void* const ptr = ct; - U16* const tableU16 = ( (U16*) ptr) + 2; - void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16) nbBits; - tableU16[-1] = (U16) maxSymbolValue; - - /* Build table */ - for (s=0; s<tableSize; s++) - tableU16[s] = (U16)(tableSize + s); - - /* Build Symbol Transformation Table */ - { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); - for (s=0; s<=maxSymbolValue; s++) { - symbolTT[s].deltaNbBits = deltaNbBits; - symbolTT[s].deltaFindState = s-1; - } } - - return 0; -} - /* fake FSE_CTable, for rle input (always same symbol) */ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) { @@ -621,5 +623,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } - #endif /* FSE_COMMONDEFS_ONLY */ |
