summaryrefslogtreecommitdiff
path: root/mm/mmu_notifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmu_notifier.c')
-rw-r--r--mm/mmu_notifier.c263
1 files changed, 202 insertions, 61 deletions
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index b5670620aea0..7fde88695f35 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,17 +21,11 @@
/* global SRCU for all MMs */
DEFINE_STATIC_SRCU(srcu);
-/*
- * This function allows mmu_notifier::release callback to delay a call to
- * a function that will free appropriate resources. The function must be
- * quick and must not block.
- */
-void mmu_notifier_call_srcu(struct rcu_head *rcu,
- void (*func)(struct rcu_head *rcu))
-{
- call_srcu(&srcu, rcu, func);
-}
-EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __mmu_notifier_invalidate_range_start_map = {
+ .name = "mmu_notifier_invalidate_range_start"
+};
+#endif
/*
* This function can't run concurrently against mmu_notifier_register
@@ -174,11 +168,19 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_range_start) {
- int _ret = mn->ops->invalidate_range_start(mn, range);
+ int _ret;
+
+ if (!mmu_notifier_range_blockable(range))
+ non_block_start();
+ _ret = mn->ops->invalidate_range_start(mn, range);
+ if (!mmu_notifier_range_blockable(range))
+ non_block_end();
if (_ret) {
pr_info("%pS callback failed with %d in %sblockable context.\n",
mn->ops->invalidate_range_start, _ret,
!mmu_notifier_range_blockable(range) ? "non-" : "");
+ WARN_ON(mmu_notifier_range_blockable(range) ||
+ ret != -EAGAIN);
ret = _ret;
}
}
@@ -187,7 +189,6 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
return ret;
}
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
bool only_end)
@@ -195,6 +196,7 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
struct mmu_notifier *mn;
int id;
+ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {
/*
@@ -214,12 +216,17 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
mn->ops->invalidate_range(mn, range->mm,
range->start,
range->end);
- if (mn->ops->invalidate_range_end)
+ if (mn->ops->invalidate_range_end) {
+ if (!mmu_notifier_range_blockable(range))
+ non_block_start();
mn->ops->invalidate_range_end(mn, range);
+ if (!mmu_notifier_range_blockable(range))
+ non_block_end();
+ }
}
srcu_read_unlock(&srcu, id);
+ lock_map_release(&__mmu_notifier_invalidate_range_start_map);
}
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);
void __mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -234,35 +241,49 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
}
srcu_read_unlock(&srcu, id);
}
-EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);
-static int do_mmu_notifier_register(struct mmu_notifier *mn,
- struct mm_struct *mm,
- int take_mmap_sem)
+/*
+ * Same as mmu_notifier_register but here the caller must hold the
+ * mmap_sem in write mode.
+ */
+int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
{
- struct mmu_notifier_mm *mmu_notifier_mm;
+ struct mmu_notifier_mm *mmu_notifier_mm = NULL;
int ret;
+ lockdep_assert_held_write(&mm->mmap_sem);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
- ret = -ENOMEM;
- mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
- if (unlikely(!mmu_notifier_mm))
- goto out;
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
+ lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+ fs_reclaim_release(GFP_KERNEL);
+ }
- if (take_mmap_sem)
- down_write(&mm->mmap_sem);
- ret = mm_take_all_locks(mm);
- if (unlikely(ret))
- goto out_clean;
+ mn->mm = mm;
+ mn->users = 1;
+
+ if (!mm->mmu_notifier_mm) {
+ /*
+ * kmalloc cannot be called under mm_take_all_locks(), but we
+ * know that mm->mmu_notifier_mm can't change while we hold
+ * the write side of the mmap_sem.
+ */
+ mmu_notifier_mm =
+ kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
+ if (!mmu_notifier_mm)
+ return -ENOMEM;
- if (!mm_has_notifiers(mm)) {
INIT_HLIST_HEAD(&mmu_notifier_mm->list);
spin_lock_init(&mmu_notifier_mm->lock);
-
- mm->mmu_notifier_mm = mmu_notifier_mm;
- mmu_notifier_mm = NULL;
}
+
+ ret = mm_take_all_locks(mm);
+ if (unlikely(ret))
+ goto out_clean;
+
+ /* Pairs with the mmdrop in mmu_notifier_unregister_* */
mmgrab(mm);
/*
@@ -273,48 +294,118 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
* We can't race against any other mmu notifier method either
* thanks to mm_take_all_locks().
*/
+ if (mmu_notifier_mm)
+ mm->mmu_notifier_mm = mmu_notifier_mm;
+
spin_lock(&mm->mmu_notifier_mm->lock);
hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_mm->list);
spin_unlock(&mm->mmu_notifier_mm->lock);
mm_drop_all_locks(mm);
+ BUG_ON(atomic_read(&mm->mm_users) <= 0);
+ return 0;
+
out_clean:
- if (take_mmap_sem)
- up_write(&mm->mmap_sem);
kfree(mmu_notifier_mm);
-out:
- BUG_ON(atomic_read(&mm->mm_users) <= 0);
return ret;
}
+EXPORT_SYMBOL_GPL(__mmu_notifier_register);
-/*
+/**
+ * mmu_notifier_register - Register a notifier on a mm
+ * @mn: The notifier to attach
+ * @mm: The mm to attach the notifier to
+ *
* Must not hold mmap_sem nor any other VM related lock when calling
* this registration function. Must also ensure mm_users can't go down
* to zero while this runs to avoid races with mmu_notifier_release,
* so mm has to be current->mm or the mm should be pinned safely such
* as with get_task_mm(). If the mm is not current->mm, the mm_users
* pin should be released by calling mmput after mmu_notifier_register
- * returns. mmu_notifier_unregister must be always called to
- * unregister the notifier. mm_count is automatically pinned to allow
- * mmu_notifier_unregister to safely run at any time later, before or
- * after exit_mmap. ->release will always be called before exit_mmap
- * frees the pages.
+ * returns.
+ *
+ * mmu_notifier_unregister() or mmu_notifier_put() must be always called to
+ * unregister the notifier.
+ *
+ * While the caller has a mmu_notifier get the mn->mm pointer will remain
+ * valid, and can be converted to an active mm pointer via mmget_not_zero().
*/
int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
{
- return do_mmu_notifier_register(mn, mm, 1);
+ int ret;
+
+ down_write(&mm->mmap_sem);
+ ret = __mmu_notifier_register(mn, mm);
+ up_write(&mm->mmap_sem);
+ return ret;
}
EXPORT_SYMBOL_GPL(mmu_notifier_register);
-/*
- * Same as mmu_notifier_register but here the caller must hold the
- * mmap_sem in write mode.
+static struct mmu_notifier *
+find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
+{
+ struct mmu_notifier *mn;
+
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops != ops)
+ continue;
+
+ if (likely(mn->users != UINT_MAX))
+ mn->users++;
+ else
+ mn = ERR_PTR(-EOVERFLOW);
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+ return mn;
+ }
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+ return NULL;
+}
+
+/**
+ * mmu_notifier_get_locked - Return the single struct mmu_notifier for
+ * the mm & ops
+ * @ops: The operations struct being subscribe with
+ * @mm : The mm to attach notifiers too
+ *
+ * This function either allocates a new mmu_notifier via
+ * ops->alloc_notifier(), or returns an already existing notifier on the
+ * list. The value of the ops pointer is used to determine when two notifiers
+ * are the same.
+ *
+ * Each call to mmu_notifier_get() must be paired with a call to
+ * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem.
+ *
+ * While the caller has a mmu_notifier get the mm pointer will remain valid,
+ * and can be converted to an active mm pointer via mmget_not_zero().
*/
-int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
+ struct mm_struct *mm)
{
- return do_mmu_notifier_register(mn, mm, 0);
+ struct mmu_notifier *mn;
+ int ret;
+
+ lockdep_assert_held_write(&mm->mmap_sem);
+
+ if (mm->mmu_notifier_mm) {
+ mn = find_get_mmu_notifier(mm, ops);
+ if (mn)
+ return mn;
+ }
+
+ mn = ops->alloc_notifier(mm);
+ if (IS_ERR(mn))
+ return mn;
+ mn->ops = ops;
+ ret = __mmu_notifier_register(mn, mm);
+ if (ret)
+ goto out_free;
+ return mn;
+out_free:
+ mn->ops->free_notifier(mn);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(__mmu_notifier_register);
+EXPORT_SYMBOL_GPL(mmu_notifier_get_locked);
/* this is called after the last mmu_notifier_unregister() returned */
void __mmu_notifier_mm_destroy(struct mm_struct *mm)
@@ -375,24 +466,74 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
-/*
- * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+static void mmu_notifier_free_rcu(struct rcu_head *rcu)
+{
+ struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu);
+ struct mm_struct *mm = mn->mm;
+
+ mn->ops->free_notifier(mn);
+ /* Pairs with the get in __mmu_notifier_register() */
+ mmdrop(mm);
+}
+
+/**
+ * mmu_notifier_put - Release the reference on the notifier
+ * @mn: The notifier to act on
+ *
+ * This function must be paired with each mmu_notifier_get(), it releases the
+ * reference obtained by the get. If this is the last reference then process
+ * to free the notifier will be run asynchronously.
+ *
+ * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release
+ * when the mm_struct is destroyed. Instead free_notifier is always called to
+ * release any resources held by the user.
+ *
+ * As ops->release is not guaranteed to be called, the user must ensure that
+ * all sptes are dropped, and no new sptes can be established before
+ * mmu_notifier_put() is called.
+ *
+ * This function can be called from the ops->release callback, however the
+ * caller must still ensure it is called pairwise with mmu_notifier_get().
+ *
+ * Modules calling this function must call mmu_notifier_synchronize() in
+ * their __exit functions to ensure the async work is completed.
*/
-void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+void mmu_notifier_put(struct mmu_notifier *mn)
{
+ struct mm_struct *mm = mn->mm;
+
spin_lock(&mm->mmu_notifier_mm->lock);
- /*
- * Can not use list_del_rcu() since __mmu_notifier_release
- * can delete it before we hold the lock.
- */
+ if (WARN_ON(!mn->users) || --mn->users)
+ goto out_unlock;
hlist_del_init_rcu(&mn->hlist);
spin_unlock(&mm->mmu_notifier_mm->lock);
- BUG_ON(atomic_read(&mm->mm_count) <= 0);
- mmdrop(mm);
+ call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu);
+ return;
+
+out_unlock:
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_put);
+
+/**
+ * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed
+ *
+ * This function ensures that all outstanding async SRU work from
+ * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops
+ * associated with an unused mmu_notifier will no longer be called.
+ *
+ * Before using the caller must ensure that all of its mmu_notifiers have been
+ * fully released via mmu_notifier_put().
+ *
+ * Modules using the mmu_notifier_put() API should call this in their __exit
+ * function to avoid module unloading races.
+ */
+void mmu_notifier_synchronize(void)
+{
+ synchronize_srcu(&srcu);
}
-EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
bool
mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range)