summaryrefslogtreecommitdiff
path: root/lib/zstd/compress/fse_compress.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/zstd/compress/fse_compress.c')
-rw-r--r--lib/zstd/compress/fse_compress.c151
1 files changed, 76 insertions, 75 deletions
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
index 436985b620e5..44a3c10becf2 100644
--- a/lib/zstd/compress/fse_compress.c
+++ b/lib/zstd/compress/fse_compress.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ******************************************************************
* FSE : Finite State Entropy encoder
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -25,7 +26,8 @@
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/zstd_deps.h" /* ZSTD_memset */
+#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@@ -75,13 +77,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
+ U32 const maxSV1 = maxSymbolValue+1;
- U32* cumul = (U32*)workSpace;
- FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
+ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
U32 highThreshold = tableSize-1;
- if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */
tableU16[-2] = (U16) tableLog;
@@ -89,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@@ -98,20 +101,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
- for (u=1; u <= maxSymbolValue+1; u++) {
+ for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
- cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+ assert(normalizedCounter[u-1] >= 0);
+ cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
+ assert(cumul[u] >= cumul[u-1]); /* no overflow */
} }
- cumul[maxSymbolValue+1] = tableSize+1;
+ cumul[maxSV1] = (U16)(tableSize+1);
}
/* Spread symbols */
- { U32 position = 0;
+ if (highThreshold == tableSize - 1) {
+ /* Case for no low prob count symbols. Lay down 8 bytes at a time
+ * to reduce branch misses since we are operating on a small block
+ */
+ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
+ { U64 const add = 0x0101010101010101ull;
+ size_t pos = 0;
+ U64 sv = 0;
+ U32 s;
+ for (s=0; s<maxSV1; ++s, sv += add) {
+ int i;
+ int const n = normalizedCounter[s];
+ MEM_write64(spread + pos, sv);
+ for (i = 8; i < n; i += 8) {
+ MEM_write64(spread + pos + i, sv);
+ }
+ assert(n>=0);
+ pos += (size_t)n;
+ }
+ }
+ /* Spread symbols across the table. Lack of lowprob symbols means that
+ * we don't need variable sized inner loop, so we can unroll the loop and
+ * reduce branch misses.
+ */
+ { size_t position = 0;
+ size_t s;
+ size_t const unroll = 2; /* Experimentally determined optimal unroll */
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
+ size_t u;
+ for (u = 0; u < unroll; ++u) {
+ size_t const uPosition = (position + (u * step)) & tableMask;
+ tableSymbol[uPosition] = spread[s + u];
+ }
+ position = (position + (unroll * step)) & tableMask;
+ }
+ assert(position == 0); /* Must have initialized all positions */
+ }
+ } else {
+ U32 position = 0;
U32 symbol;
- for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+ for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences;
int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@@ -120,7 +164,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
} }
-
assert(position==0); /* Must have initialized all positions */
}
@@ -144,16 +187,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
- symbolTT[s].deltaFindState = total - 1;
+ assert(total <= INT_MAX);
+ symbolTT[s].deltaFindState = (int)(total - 1);
total ++;
break;
default :
- {
- U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
- U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+ assert(normalizedCounter[s] > 1);
+ { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
+ U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
- total += normalizedCounter[s];
+ symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
+ total += (unsigned)normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
@@ -164,8 +208,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
- }
- }
+ } }
#endif
return 0;
@@ -173,16 +216,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
-
#ifndef FSE_COMMONDEFS_ONLY
-
/*-**************************************************************
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
- size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ + 4 /* bitCount initialized at 4 */
+ + 2 /* first two symbols may use one additional bit each */) / 8)
+ + 1 /* round up to whole nb bytes */
+ + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
@@ -211,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
- nbBits = tableLog+1;
+ nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
@@ -230,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
}
while (symbol >= start+3) {
start+=3;
- bitStream += 3 << bitCount;
+ bitStream += 3U << bitCount;
bitCount += 2;
}
bitStream += (symbol-start) << bitCount;
@@ -250,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
count++; /* +1 for extra accuracy */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
- bitStream += count << bitCount;
+ bitStream += (U32)count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previousIs0 = (count==1);
@@ -278,7 +323,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
- return (out-ostart);
+ assert(out >= ostart);
+ return (size_t)(out-ostart);
}
@@ -299,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code
****************************************************************/
-FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
-{
- size_t size;
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
- return (FSE_CTable*)ZSTD_malloc(size);
-}
-
-void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
-
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
- U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+ U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
+ U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
@@ -321,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+ U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@@ -489,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog;
}
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
-{
- const unsigned tableSize = 1 << nbBits;
- const unsigned tableMask = tableSize - 1;
- const unsigned maxSymbolValue = tableMask;
- void* const ptr = ct;
- U16* const tableU16 = ( (U16*) ptr) + 2;
- void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
- FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
- unsigned s;
-
- /* Sanity checks */
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
-
- /* header */
- tableU16[-2] = (U16) nbBits;
- tableU16[-1] = (U16) maxSymbolValue;
-
- /* Build table */
- for (s=0; s<tableSize; s++)
- tableU16[s] = (U16)(tableSize + s);
-
- /* Build Symbol Transformation Table */
- { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
- for (s=0; s<=maxSymbolValue; s++) {
- symbolTT[s].deltaNbBits = deltaNbBits;
- symbolTT[s].deltaFindState = s-1;
- } }
-
- return 0;
-}
-
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
@@ -621,5 +623,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
-
#endif /* FSE_COMMONDEFS_ONLY */