diff options
author | Kuan-Wei Chiu <visitorckw@gmail.com> | 2024-04-07 11:39:04 +0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2024-05-08 17:29:18 -0400 |
commit | 0ddb5f0854a8f42d949f5d8a49980d8174f266a3 (patch) | |
tree | 5edc9913119e8675029c9d7bac3caa03e3289f3d /fs/bcachefs/eytzinger.c | |
parent | be31bf439c2154e4c1174186cf2bd6fd1e6b7a88 (diff) |
bcachefs: Optimize eytzinger0_sort() with bottom-up heapsort
This optimization reduces the average number of comparisons required
from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). When n is
sufficiently large, it results in approximately 50% fewer comparisons.
Currently, eytzinger0_sort employs the textbook version of heapsort,
where during the heapify process, each level requires two comparisons
to determine the maximum among three elements. In contrast, the
bottom-up heapsort, during heapify, only compares two children at each
level until reaching a leaf node. Then, it backtracks from the leaf
node to find the correct position. Since heapify typically continues
until very close to the leaf node, the standard heapify requires about
2*log2(n) comparisons, while the bottom-up variant only needs log2(n)
comparisons.
The experimental data presented below is based on an array generated
by get_random_u32().
| N | comparisons(old) | comparisons(new) | time(old) | time(new) |
|-------|------------------|------------------|-----------|-----------|
| 10000 | 235381 | 136615 | 25545 us | 20366 us |
| 20000 | 510694 | 293425 | 31336 us | 18312 us |
| 30000 | 800384 | 457412 | 35042 us | 27386 us |
| 40000 | 1101617 | 626831 | 48779 us | 38253 us |
| 50000 | 1409762 | 799637 | 62238 us | 46950 us |
| 60000 | 1721191 | 974521 | 75588 us | 58367 us |
| 70000 | 2038536 | 1152171 | 90823 us | 68778 us |
| 80000 | 2362958 | 1333472 | 104165 us | 78625 us |
| 90000 | 2690900 | 1516065 | 116111 us | 89573 us |
| 100000| 3019413 | 1699879 | 133638 us | 100998 us |
Refs:
BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
QUICKSORT (if n is not very small)
Ingo Wegener
Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
https://doi.org/10.1016/0304-3975(93)90364-Y
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/eytzinger.c')
-rw-r--r-- | fs/bcachefs/eytzinger.c | 105 |
1 files changed, 88 insertions, 17 deletions
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c index 0f955c3c76a7..2eaffe37b5e7 100644 --- a/fs/bcachefs/eytzinger.c +++ b/fs/bcachefs/eytzinger.c @@ -171,7 +171,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size, swap_r_func_t swap_func, const void *priv) { - int i, c, r; + int i, j, k; /* called from 'sort' without swap function, let's pick the default */ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func) @@ -188,17 +188,22 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size, /* heapify */ for (i = n / 2 - 1; i >= 0; --i) { - for (r = i; r * 2 + 1 < n; r = c) { - c = r * 2 + 1; + /* Find the sift-down path all the way to the leaves. */ + for (j = i; k = j * 2 + 1, k + 1 < n;) + j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1; - if (c + 1 < n && - eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) - c++; + /* Special case for the last leaf with no sibling. */ + if (j * 2 + 2 == n) + j = j * 2 + 1; - if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) - break; + /* Backtrack to the correct location. */ + while (j != i && eytzinger0_do_cmp(base, n, size, cmp_func, priv, i, j) >= 0) + j = (j - 1) / 2; - eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + /* Shift the element into its correct place. */ + for (k = j; j != i;) { + j = (j - 1) / 2; + eytzinger0_do_swap(base, n, size, swap_func, priv, j, k); } } @@ -206,17 +211,22 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size, for (i = n - 1; i > 0; --i) { eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); - for (r = 0; r * 2 + 1 < i; r = c) { - c = r * 2 + 1; + /* Find the sift-down path all the way to the leaves. */ + for (j = 0; k = j * 2 + 1, k + 1 < i;) + j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1; - if (c + 1 < i && - eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) - c++; + /* Special case for the last leaf with no sibling. */ + if (j * 2 + 2 == i) + j = j * 2 + 1; - if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) - break; + /* Backtrack to the correct location. */ + while (j && eytzinger0_do_cmp(base, n, size, cmp_func, priv, 0, j) >= 0) + j = (j - 1) / 2; - eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + /* Shift the element into its correct place. */ + for (k = j; j;) { + j = (j - 1) / 2; + eytzinger0_do_swap(base, n, size, swap_func, priv, j, k); } } } @@ -232,3 +242,64 @@ void eytzinger0_sort(void *base, size_t n, size_t size, return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); } + +#if 0 +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/ktime.h> + +static u64 cmp_count; + +static int mycmp(const void *a, const void *b) +{ + u32 _a = *(u32 *)a; + u32 _b = *(u32 *)b; + + cmp_count++; + if (_a < _b) + return -1; + else if (_a > _b) + return 1; + else + return 0; +} + +static int test(void) +{ + size_t N, i; + ktime_t start, end; + s64 delta; + u32 *arr; + + for (N = 10000; N <= 100000; N += 10000) { + arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL); + cmp_count = 0; + + for (i = 0; i < N; i++) + arr[i] = get_random_u32(); + + start = ktime_get(); + eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL); + end = ktime_get(); + + delta = ktime_us_delta(end, start); + printk(KERN_INFO "time: %lld\n", delta); + printk(KERN_INFO "comparisons: %lld\n", cmp_count); + + u32 prev = 0; + + eytzinger0_for_each(i, N) { + if (prev > arr[i]) + goto err; + prev = arr[i]; + } + + kfree(arr); + } + return 0; + +err: + kfree(arr); + return -1; +} +#endif |