summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/futex.h26
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/uapi/linux/prctl.h5
-rw-r--r--init/Kconfig5
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex/core.c208
-rw-r--r--kernel/futex/futex.h10
-rw-r--r--kernel/sys.c4
8 files changed, 244 insertions, 21 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85..8f1be08bef18 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,11 @@
#include <linux/sched.h>
#include <linux/ktime.h>
+#include <linux/mm_types.h>
#include <uapi/linux/futex.h>
struct inode;
-struct mm_struct;
struct task_struct;
/*
@@ -77,7 +77,22 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
-#else
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+void futex_hash_free(struct mm_struct *mm);
+
+static inline void futex_mm_init(struct mm_struct *mm)
+{
+ mm->futex_phash = NULL;
+}
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+
+#else /* !CONFIG_FUTEX */
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -88,6 +103,13 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
{
return -EINVAL;
}
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ return -EINVAL;
+}
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+
#endif
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56d07edd01f9..a4b5661e4177 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -31,6 +31,7 @@
#define INIT_PASID 0
struct address_space;
+struct futex_private_hash;
struct mem_cgroup;
/*
@@ -1031,7 +1032,9 @@ struct mm_struct {
*/
seqcount_t mm_lock_seq;
#endif
-
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+ struct futex_private_hash *futex_phash;
+#endif
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 15c18ef4eb11..3b93fb906e3c 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -364,4 +364,9 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 63f5974b9fa6..4b84da2b2ec4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1699,6 +1699,11 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y
+config FUTEX_PRIVATE_HASH
+ bool
+ depends on FUTEX && !BASE_SMALL && MMU
+ default y
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index c4b26cd8998b..831dfec45054 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1305,6 +1305,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
+ futex_mm_init(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
mm->pmd_huge_pte = NULL;
#endif
@@ -1387,6 +1388,7 @@ static inline void __mmput(struct mm_struct *mm)
if (mm->binfmt)
module_put(mm->binfmt->module);
lru_gen_del_mm(mm);
+ futex_hash_free(mm);
mmdrop(mm);
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index afc66780f84f..818df7420a1a 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -39,6 +39,7 @@
#include <linux/memblock.h>
#include <linux/fault-inject.h>
#include <linux/slab.h>
+#include <linux/prctl.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
@@ -55,6 +56,12 @@ static struct {
#define futex_queues (__futex_data.queues)
#define futex_hashmask (__futex_data.hashmask)
+struct futex_private_hash {
+ unsigned int hash_mask;
+ void *mm;
+ bool custom;
+ struct futex_hash_bucket queues[];
+};
/*
* Fault injections for futexes.
@@ -107,9 +114,17 @@ late_initcall(fail_futex_debugfs);
#endif /* CONFIG_FAIL_FUTEX */
-struct futex_private_hash *futex_private_hash(void)
+static struct futex_hash_bucket *
+__futex_hash(union futex_key *key, struct futex_private_hash *fph);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+static inline bool futex_key_is_private(union futex_key *key)
{
- return NULL;
+ /*
+ * Relies on get_futex_key() to set either bit for shared
+ * futexes -- see comment with union futex_key.
+ */
+ return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
}
bool futex_private_hash_get(struct futex_private_hash *fph)
@@ -117,21 +132,8 @@ bool futex_private_hash_get(struct futex_private_hash *fph)
return false;
}
-void futex_private_hash_put(struct futex_private_hash *fph) { }
-
-/**
- * futex_hash - Return the hash bucket in the global hash
- * @key: Pointer to the futex key for which the hash is calculated
- *
- * We hash on the keys returned from get_futex_key (see below) and return the
- * corresponding hash bucket in the global hash.
- */
-struct futex_hash_bucket *futex_hash(union futex_key *key)
+void futex_private_hash_put(struct futex_private_hash *fph)
{
- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
- key->both.offset);
-
- return &futex_queues[hash & futex_hashmask];
}
/**
@@ -144,6 +146,84 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
void futex_hash_get(struct futex_hash_bucket *hb) { }
void futex_hash_put(struct futex_hash_bucket *hb) { }
+static struct futex_hash_bucket *
+__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
+{
+ u32 hash;
+
+ if (!futex_key_is_private(key))
+ return NULL;
+
+ if (!fph)
+ fph = key->private.mm->futex_phash;
+ if (!fph || !fph->hash_mask)
+ return NULL;
+
+ hash = jhash2((void *)&key->private.address,
+ sizeof(key->private.address) / 4,
+ key->both.offset);
+ return &fph->queues[hash & fph->hash_mask];
+}
+
+struct futex_private_hash *futex_private_hash(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct futex_private_hash *fph;
+
+ fph = mm->futex_phash;
+ return fph;
+}
+
+struct futex_hash_bucket *futex_hash(union futex_key *key)
+{
+ struct futex_hash_bucket *hb;
+
+ hb = __futex_hash(key, NULL);
+ return hb;
+}
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+
+static struct futex_hash_bucket *
+__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
+{
+ return NULL;
+}
+
+struct futex_hash_bucket *futex_hash(union futex_key *key)
+{
+ return __futex_hash(key, NULL);
+}
+
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+
+/**
+ * __futex_hash - Return the hash bucket
+ * @key: Pointer to the futex key for which the hash is calculated
+ * @fph: Pointer to private hash if known
+ *
+ * We hash on the keys returned from get_futex_key (see below) and return the
+ * corresponding hash bucket.
+ * If the FUTEX is PROCESS_PRIVATE then a per-process hash bucket (from the
+ * private hash) is returned if existing. Otherwise a hash bucket from the
+ * global hash is returned.
+ */
+static struct futex_hash_bucket *
+__futex_hash(union futex_key *key, struct futex_private_hash *fph)
+{
+ struct futex_hash_bucket *hb;
+ u32 hash;
+
+ hb = __futex_hash_private(key, fph);
+ if (hb)
+ return hb;
+
+ hash = jhash2((u32 *)key,
+ offsetof(typeof(*key), both.offset) / 4,
+ key->both.offset);
+ return &futex_queues[hash & futex_hashmask];
+}
+
/**
* futex_setup_timer - set up the sleeping hrtimer.
* @time: ptr to the given timeout value
@@ -986,6 +1066,13 @@ static void exit_pi_state_list(struct task_struct *curr)
union futex_key key = FUTEX_KEY_INIT;
/*
+ * Ensure the hash remains stable (no resize) during the while loop
+ * below. The hb pointer is acquired under the pi_lock so we can't block
+ * on the mutex.
+ */
+ WARN_ON(curr != current);
+ guard(private_hash)();
+ /*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
* versus waiters unqueueing themselves:
@@ -1160,13 +1247,98 @@ void futex_exit_release(struct task_struct *tsk)
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
}
-static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
+static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
+ struct futex_private_hash *fph)
{
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+ fhb->priv = fph;
+#endif
atomic_set(&fhb->waiters, 0);
plist_head_init(&fhb->chain);
spin_lock_init(&fhb->lock);
}
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+void futex_hash_free(struct mm_struct *mm)
+{
+ kvfree(mm->futex_phash);
+}
+
+static int futex_hash_allocate(unsigned int hash_slots, bool custom)
+{
+ struct mm_struct *mm = current->mm;
+ struct futex_private_hash *fph;
+ int i;
+
+ if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots)))
+ return -EINVAL;
+
+ if (mm->futex_phash)
+ return -EALREADY;
+
+ if (!thread_group_empty(current))
+ return -EINVAL;
+
+ fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!fph)
+ return -ENOMEM;
+
+ fph->hash_mask = hash_slots ? hash_slots - 1 : 0;
+ fph->custom = custom;
+ fph->mm = mm;
+
+ for (i = 0; i < hash_slots; i++)
+ futex_hash_bucket_init(&fph->queues[i], fph);
+
+ mm->futex_phash = fph;
+ return 0;
+}
+
+static int futex_hash_get_slots(void)
+{
+ struct futex_private_hash *fph;
+
+ fph = current->mm->futex_phash;
+ if (fph && fph->hash_mask)
+ return fph->hash_mask + 1;
+ return 0;
+}
+
+#else
+
+static int futex_hash_allocate(unsigned int hash_slots, bool custom)
+{
+ return -EINVAL;
+}
+
+static int futex_hash_get_slots(void)
+{
+ return 0;
+}
+#endif
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ int ret;
+
+ switch (arg2) {
+ case PR_FUTEX_HASH_SET_SLOTS:
+ if (arg4 != 0)
+ return -EINVAL;
+ ret = futex_hash_allocate(arg3, true);
+ break;
+
+ case PR_FUTEX_HASH_GET_SLOTS:
+ ret = futex_hash_get_slots();
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static int __init futex_init(void)
{
unsigned long hashsize, i;
@@ -1185,7 +1357,7 @@ static int __init futex_init(void)
hashsize = 1UL << futex_shift;
for (i = 0; i < hashsize; i++)
- futex_hash_bucket_init(&futex_queues[i]);
+ futex_hash_bucket_init(&futex_queues[i], NULL);
futex_hashmask = hashsize - 1;
return 0;
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 26e69333cb74..899aed5acde1 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -118,6 +118,7 @@ struct futex_hash_bucket {
atomic_t waiters;
spinlock_t lock;
struct plist_head chain;
+ struct futex_private_hash *priv;
} ____cacheline_aligned_in_smp;
/*
@@ -204,6 +205,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
extern void futex_hash_get(struct futex_hash_bucket *hb);
extern void futex_hash_put(struct futex_hash_bucket *hb);
@@ -211,6 +213,14 @@ extern struct futex_private_hash *futex_private_hash(void);
extern bool futex_private_hash_get(struct futex_private_hash *fph);
extern void futex_private_hash_put(struct futex_private_hash *fph);
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
+static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
+static inline struct futex_private_hash *futex_private_hash(void) { return NULL; }
+static inline bool futex_private_hash_get(void) { return false; }
+static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
+#endif
+
DEFINE_CLASS(hb, struct futex_hash_bucket *,
if (_T) futex_hash_put(_T),
futex_hash(key), union futex_key *key);
diff --git a/kernel/sys.c b/kernel/sys.c
index c434968e9f5d..adc0de0aa364 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>
+#include <linux/futex.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
@@ -2820,6 +2821,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EINVAL;
error = posixtimer_create_prctl(arg2);
break;
+ case PR_FUTEX_HASH:
+ error = futex_hash_prctl(arg2, arg3, arg4);
+ break;
default:
trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
error = -EINVAL;