summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpumask.h17
-rw-r--r--include/linux/find.h37
-rw-r--r--include/linux/percpu_counter.h6
-rw-r--r--kernel/fork.c5
-rw-r--r--lib/find_bit.c9
-rw-r--r--lib/percpu_counter.c37
6 files changed, 77 insertions, 34 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d4901ca8883c..ca736b05ec7b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -351,6 +351,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
/**
+ * for_each_cpu_or - iterate over every cpu present in either mask
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
+ *
+ * This saves a temporary CPU mask in many places. It is equivalent to:
+ * struct cpumask tmp;
+ * cpumask_or(&tmp, &mask1, &mask2);
+ * for_each_cpu(cpu, &tmp)
+ * ...
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_or(cpu, mask1, mask2) \
+ for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
+
+/**
* cpumask_any_but - return a "random" in a cpumask, but not this one.
* @mask: the cpumask to search
* @cpu: the cpu to ignore.
diff --git a/include/linux/find.h b/include/linux/find.h
index 4647864a5ffd..5e4f39ef2e72 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -14,6 +14,8 @@ unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long
unsigned long nbits, unsigned long start);
unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long nbits, unsigned long start);
+unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start);
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
unsigned long start);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
@@ -127,6 +129,36 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
}
#endif
+#ifndef find_next_or_bit
+/**
+ * find_next_or_bit - find the next set bit in either memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_or_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = (*addr1 | *addr2) & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_or_bit(addr1, addr2, size, offset);
+}
+#endif
+
#ifndef find_next_zero_bit
/**
* find_next_zero_bit - find the next cleared bit in a memory region
@@ -536,6 +568,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned
(bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
(bit)++)
+#define for_each_or_bit(bit, addr1, addr2, size) \
+ for ((bit) = 0; \
+ (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
+ (bit)++)
+
/* same as for_each_set_bit() but use bit as value to start with */
#define for_each_set_bit_from(bit, addr, size) \
for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 521a733e21a9..75b73c83bc9d 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -45,7 +45,6 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
-s64 percpu_counter_sum_all(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);
@@ -196,11 +195,6 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
return percpu_counter_read(fbc);
}
-static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc)
-{
- return percpu_counter_read(fbc);
-}
-
static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
return true;
diff --git a/kernel/fork.c b/kernel/fork.c
index d8cda4c6de6c..c0257cbee093 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -755,11 +755,6 @@ static void check_mm(struct mm_struct *mm)
for (i = 0; i < NR_MM_COUNTERS; i++) {
long x = percpu_counter_sum(&mm->rss_stat[i]);
- if (likely(!x))
- continue;
-
- /* Making sure this is not due to race with CPU offlining. */
- x = percpu_counter_sum_all(&mm->rss_stat[i]);
if (unlikely(x))
pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
mm, resident_page_types[i], x);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index c10920e66788..32f99e9a670e 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l
EXPORT_SYMBOL(_find_next_andnot_bit);
#endif
+#ifndef find_next_or_bit
+unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_or_bit);
+#endif
+
#ifndef find_next_zero_bit
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
unsigned long start)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index dba56c5c1837..5004463c4f9f 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -122,8 +122,19 @@ void percpu_counter_sync(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_sync);
-static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
- const struct cpumask *cpu_mask)
+/*
+ * Add up all the per-cpu counts, return the result. This is a more accurate
+ * but much slower version of percpu_counter_read_positive().
+ *
+ * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums
+ * from CPUs that are in the process of being taken offline. Dying cpus have
+ * been removed from the online mask, but may not have had the hotplug dead
+ * notifier called to fold the percpu count back into the global counter sum.
+ * By including dying CPUs in the iteration mask, we avoid this race condition
+ * so __percpu_counter_sum() just does the right thing when CPUs are being taken
+ * offline.
+ */
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
s64 ret;
int cpu;
@@ -131,35 +142,15 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
raw_spin_lock_irqsave(&fbc->lock, flags);
ret = fbc->count;
- for_each_cpu(cpu, cpu_mask) {
+ for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
}
raw_spin_unlock_irqrestore(&fbc->lock, flags);
return ret;
}
-
-/*
- * Add up all the per-cpu counts, return the result. This is a more accurate
- * but much slower version of percpu_counter_read_positive()
- */
-s64 __percpu_counter_sum(struct percpu_counter *fbc)
-{
- return __percpu_counter_sum_mask(fbc, cpu_online_mask);
-}
EXPORT_SYMBOL(__percpu_counter_sum);
-/*
- * This is slower version of percpu_counter_sum as it traverses all possible
- * cpus. Use this only in the cases where accurate data is needed in the
- * presense of CPUs getting offlined.
- */
-s64 percpu_counter_sum_all(struct percpu_counter *fbc)
-{
- return __percpu_counter_sum_mask(fbc, cpu_possible_mask);
-}
-EXPORT_SYMBOL(percpu_counter_sum_all);
-
int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
struct lock_class_key *key)
{