summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/notify/dnotify/dnotify.c5
-rw-r--r--fs/notify/fanotify/fanotify.c14
-rw-r--r--fs/notify/fanotify/fanotify_user.c5
-rw-r--r--fs/notify/group.c3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c7
-rw-r--r--fs/notify/inotify/inotify_user.c5
-rw-r--r--include/linux/fsnotify_backend.h12
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/sched/mm.h37
-rw-r--r--kernel/fork.c3
-rw-r--r--mm/memcontrol.c37
12 files changed, 123 insertions, 18 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e2bea2ac5dfb..a6365e6bc047 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ out_err:
static int __init dnotify_init(void)
{
- dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
- dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+ dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+ SLAB_PANIC|SLAB_ACCOUNT);
+ dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..eb4e75175cfb 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/audit.h>
+#include <linux/sched/mm.h>
#include "fanotify.h"
@@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *inode, u32 mask,
const struct path *path)
{
- struct fanotify_event_info *event;
- gfp_t gfp = GFP_KERNEL;
+ struct fanotify_event_info *event = NULL;
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
/*
* For queues with unlimited length lost events are not expected and
@@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL;
+ /* Whoever is interested in the event, pays for the allocation. */
+ memalloc_use_memcg(group->memcg);
+
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event_info *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
if (!pevent)
- return NULL;
+ goto out;
event = &pevent->fae;
pevent->response = 0;
goto init;
}
event = kmem_cache_alloc(fanotify_event_cachep, gfp);
if (!event)
- return NULL;
+ goto out;
init: __maybe_unused
fsnotify_init_event(&event->fse, inode, mask);
event->tgid = get_pid(task_tgid(current));
@@ -174,6 +178,8 @@ init: __maybe_unused
event->path.mnt = NULL;
event->path.dentry = NULL;
}
+out:
+ memalloc_unuse_memcg();
return event;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..0cf45041dc32 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <linux/compat.h>
#include <linux/sched/signal.h>
+#include <linux/memcontrol.h>
#include <asm/ioctls.h>
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.user = user;
atomic_inc(&user->fanotify_listeners);
+ group->memcg = get_mem_cgroup_from_mm(current->mm);
oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
*/
static int __init fanotify_user_setup(void)
{
- fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+ fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+ SLAB_PANIC|SLAB_ACCOUNT);
fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
fanotify_perm_event_cachep =
diff --git a/fs/notify/group.c b/fs/notify/group.c
index aa5468f23e45..c03b83662876 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,6 +22,7 @@
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
+#include <linux/memcontrol.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
@@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
if (group->ops->free_group_priv)
group->ops->free_group_priv(group);
+ mem_cgroup_put(group->memcg);
+
kfree(group);
}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ab6dde38a14..f4184b4f3815 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -31,6 +31,7 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sched/user.h>
+#include <linux/sched/mm.h>
#include "inotify.h"
@@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group,
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
- event = kmalloc(alloc_len, GFP_KERNEL);
+ /* Whoever is interested in the event, pays for the allocation. */
+ memalloc_use_memcg(group->memcg);
+ event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+ memalloc_unuse_memcg();
+
if (unlikely(!event)) {
/*
* Treat lost event due to ENOMEM the same way as queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..749c46ababa0 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <linux/wait.h>
+#include <linux/memcontrol.h>
#include "inotify.h"
#include "../fdinfo.h"
@@ -636,6 +637,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
oevent->name_len = 0;
group->max_events = max_events;
+ group->memcg = get_mem_cgroup_from_mm(current->mm);
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
@@ -808,7 +810,8 @@ static int __init inotify_user_setup(void)
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
- inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
+ inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
+ SLAB_PANIC|SLAB_ACCOUNT);
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b38964a7a521..a0c4790c5302 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -84,6 +84,8 @@ struct fsnotify_event_private_data;
struct fsnotify_fname;
struct fsnotify_iter_info;
+struct mem_cgroup;
+
/*
* Each group much define these ops. The fsnotify infrastructure will call
* these operations for each relevant group.
@@ -127,6 +129,8 @@ struct fsnotify_event {
* everything will be cleaned up.
*/
struct fsnotify_group {
+ const struct fsnotify_ops *ops; /* how this group handles things */
+
/*
* How the refcnt is used is up to each group. When the refcnt hits 0
* fsnotify will clean up all of the resources associated with this group.
@@ -137,8 +141,6 @@ struct fsnotify_group {
*/
refcount_t refcnt; /* things with interest in this group */
- const struct fsnotify_ops *ops; /* how this group handles things */
-
/* needed to send notification to userspace */
spinlock_t notification_lock; /* protect the notification_list */
struct list_head notification_list; /* list of event_holder this group needs to send to userspace */
@@ -160,6 +162,8 @@ struct fsnotify_group {
atomic_t num_marks; /* 1 for each mark and 1 for not being
* past the point of no return when freeing
* a group */
+ atomic_t user_waits; /* Number of tasks waiting for user
+ * response */
struct list_head marks_list; /* all inode marks for this group */
struct fasync_struct *fsn_fa; /* async notification */
@@ -167,8 +171,8 @@ struct fsnotify_group {
struct fsnotify_event *overflow_event; /* Event we queue when the
* notification list is too
* full */
- atomic_t user_waits; /* Number of tasks waiting for user
- * response */
+
+ struct mem_cgroup *memcg; /* memcg to charge allocations */
/* groups can define private fields here or use the void *private */
union {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 42f4719def32..121e218d2a21 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -373,6 +373,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
+
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -380,7 +382,8 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
- css_put(&memcg->css);
+ if (memcg)
+ css_put(&memcg->css);
}
#define mem_cgroup_from_counter(counter, member) \
@@ -855,6 +858,11 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
return true;
}
+static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+ return NULL;
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95a5018c338e..1827f4a7a6de 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1152,6 +1152,9 @@ struct task_struct {
/* Number of pages to reclaim on returning to userland: */
unsigned int memcg_nr_pages_over_high;
+
+ /* Used by memcontrol for targeted memcg charge: */
+ struct mem_cgroup *active_memcg;
#endif
#ifdef CONFIG_BLK_CGROUP
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..aebb370a0006 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,43 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
current->flags = (current->flags & ~PF_MEMALLOC) | flags;
}
+#ifdef CONFIG_MEMCG
+/**
+ * memalloc_use_memcg - Starts the remote memcg charging scope.
+ * @memcg: memcg to charge.
+ *
+ * This function marks the beginning of the remote memcg charging scope. All the
+ * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
+ * given memcg.
+ *
+ * NOTE: This function is not nesting safe.
+ */
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+ WARN_ON_ONCE(current->active_memcg);
+ current->active_memcg = memcg;
+}
+
+/**
+ * memalloc_unuse_memcg - Ends the remote memcg charging scope.
+ *
+ * This function marks the end of the remote memcg charging scope started by
+ * memalloc_use_memcg().
+ */
+static inline void memalloc_unuse_memcg(void)
+{
+ current->active_memcg = NULL;
+}
+#else
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void memalloc_unuse_memcg(void)
+{
+}
+#endif
+
#ifdef CONFIG_MEMBARRIER
enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
diff --git a/kernel/fork.c b/kernel/fork.c
index 33112315b5c0..5ee74c113381 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -871,6 +871,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->use_memdelay = 0;
#endif
+#ifdef CONFIG_MEMCG
+ tsk->active_memcg = NULL;
+#endif
return tsk;
free_stack:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b836e7f00309..bf9cf738c836 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -678,9 +678,20 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
}
EXPORT_SYMBOL(mem_cgroup_from_task);
-static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+/**
+ * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
+ * @mm: mm from which memcg should be extracted. It can be NULL.
+ *
+ * Obtain a reference on mm->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
+ * returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
- struct mem_cgroup *memcg = NULL;
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return NULL;
rcu_read_lock();
do {
@@ -700,6 +711,24 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
rcu_read_unlock();
return memcg;
}
+EXPORT_SYMBOL(get_mem_cgroup_from_mm);
+
+/**
+ * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+ if (unlikely(current->active_memcg)) {
+ struct mem_cgroup *memcg = root_mem_cgroup;
+
+ rcu_read_lock();
+ if (css_tryget_online(&current->active_memcg->css))
+ memcg = current->active_memcg;
+ rcu_read_unlock();
+ return memcg;
+ }
+ return get_mem_cgroup_from_mm(current->mm);
+}
/**
* mem_cgroup_iter - iterate over memory cgroup hierarchy
@@ -2261,7 +2290,7 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
if (current->memcg_kmem_skip_account)
return cachep;
- memcg = get_mem_cgroup_from_mm(current->mm);
+ memcg = get_mem_cgroup_from_current();
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
if (kmemcg_id < 0)
goto out;
@@ -2345,7 +2374,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
if (memcg_kmem_bypass())
return 0;
- memcg = get_mem_cgroup_from_mm(current->mm);
+ memcg = get_mem_cgroup_from_current();
if (!mem_cgroup_is_root(memcg)) {
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
if (!ret)