summaryrefslogtreecommitdiff
path: root/mm/swap_cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap_cgroup.c')
-rw-r--r--mm/swap_cgroup.c235
1 files changed, 88 insertions, 147 deletions
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index db6c4a26cf59..be39078f255b 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -6,149 +6,106 @@
#include <linux/swapops.h> /* depends on mm.h include */
static DEFINE_MUTEX(swap_cgroup_mutex);
-struct swap_cgroup_ctrl {
- struct page **map;
- unsigned long length;
- spinlock_t lock;
-};
-
-static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+/* Pack two cgroup id (short) of two entries in one swap_cgroup (atomic_t) */
+#define ID_PER_SC (sizeof(struct swap_cgroup) / sizeof(unsigned short))
+#define ID_SHIFT (BITS_PER_TYPE(unsigned short))
+#define ID_MASK (BIT(ID_SHIFT) - 1)
struct swap_cgroup {
- unsigned short id;
+ atomic_t ids;
};
-#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
-/*
- * SwapCgroup implements "lookup" and "exchange" operations.
- * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
- * against SwapCache. At swap_free(), this is accessed directly from swap.
- *
- * This means,
- * - we have no race in "exchange" when we're accessed via SwapCache because
- * SwapCache(and its swp_entry) is under lock.
- * - When called via swap_free(), there is no user of this entry and no race.
- * Then, we don't need lock around "exchange".
- *
- * TODO: we can push these buffers out to HIGHMEM.
- */
-
-/*
- * allocate buffer for swap_cgroup.
- */
-static int swap_cgroup_prepare(int type)
-{
- struct page *page;
- struct swap_cgroup_ctrl *ctrl;
- unsigned long idx, max;
-
- ctrl = &swap_cgroup_ctrl[type];
-
- for (idx = 0; idx < ctrl->length; idx++) {
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
- goto not_enough_page;
- ctrl->map[idx] = page;
-
- if (!(idx % SWAP_CLUSTER_MAX))
- cond_resched();
- }
- return 0;
-not_enough_page:
- max = idx;
- for (idx = 0; idx < max; idx++)
- __free_page(ctrl->map[idx]);
+struct swap_cgroup_ctrl {
+ struct swap_cgroup *map;
+};
- return -ENOMEM;
-}
+static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
-static struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
- pgoff_t offset)
+static unsigned short __swap_cgroup_id_lookup(struct swap_cgroup *map,
+ pgoff_t offset)
{
- struct page *mappage;
- struct swap_cgroup *sc;
+ unsigned int shift = (offset % ID_PER_SC) * ID_SHIFT;
+ unsigned int old_ids = atomic_read(&map[offset / ID_PER_SC].ids);
- mappage = ctrl->map[offset / SC_PER_PAGE];
- sc = page_address(mappage);
- return sc + offset % SC_PER_PAGE;
+ BUILD_BUG_ON(!is_power_of_2(ID_PER_SC));
+ BUILD_BUG_ON(sizeof(struct swap_cgroup) != sizeof(atomic_t));
+
+ return (old_ids >> shift) & ID_MASK;
}
-static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
- struct swap_cgroup_ctrl **ctrlp)
+static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
+ pgoff_t offset,
+ unsigned short new_id)
{
- pgoff_t offset = swp_offset(ent);
- struct swap_cgroup_ctrl *ctrl;
-
- ctrl = &swap_cgroup_ctrl[swp_type(ent)];
- if (ctrlp)
- *ctrlp = ctrl;
- return __lookup_swap_cgroup(ctrl, offset);
+ unsigned short old_id;
+ struct swap_cgroup *sc = &map[offset / ID_PER_SC];
+ unsigned int shift = (offset % ID_PER_SC) * ID_SHIFT;
+ unsigned int new_ids, old_ids = atomic_read(&sc->ids);
+
+ do {
+ old_id = (old_ids >> shift) & ID_MASK;
+ new_ids = (old_ids & ~(ID_MASK << shift));
+ new_ids |= ((unsigned int)new_id) << shift;
+ } while (!atomic_try_cmpxchg(&sc->ids, &old_ids, new_ids));
+
+ return old_id;
}
/**
- * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
- * @ent: swap entry to be cmpxchged
- * @old: old id
- * @new: new id
+ * swap_cgroup_record - record mem_cgroup for a set of swap entries.
+ * These entries must belong to one single folio, and that folio
+ * must be being charged for swap space (swap out), and these
+ * entries must not have been charged
*
- * Returns old id at success, 0 at failure.
- * (There is no mem_cgroup using 0 as its id)
+ * @folio: the folio that the swap entry belongs to
+ * @ent: the first swap entry to be recorded
*/
-unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
- unsigned short old, unsigned short new)
+void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
{
- struct swap_cgroup_ctrl *ctrl;
- struct swap_cgroup *sc;
- unsigned long flags;
- unsigned short retval;
-
- sc = lookup_swap_cgroup(ent, &ctrl);
-
- spin_lock_irqsave(&ctrl->lock, flags);
- retval = sc->id;
- if (retval == old)
- sc->id = new;
- else
- retval = 0;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- return retval;
+ unsigned int nr_ents = folio_nr_pages(folio);
+ struct swap_cgroup *map;
+ pgoff_t offset, end;
+ unsigned short old;
+
+ offset = swp_offset(ent);
+ end = offset + nr_ents;
+ map = swap_cgroup_ctrl[swp_type(ent)].map;
+
+ do {
+ old = __swap_cgroup_id_xchg(map, offset,
+ mem_cgroup_id(folio_memcg(folio)));
+ VM_BUG_ON(old);
+ } while (++offset != end);
}
/**
- * swap_cgroup_record - record mem_cgroup for a set of swap entries
+ * swap_cgroup_clear - clear mem_cgroup for a set of swap entries.
+ * These entries must be being uncharged from swap. They either
+ * belongs to one single folio in the swap cache (swap in for
+ * cgroup v1), or no longer have any users (slot freeing).
+ *
* @ent: the first swap entry to be recorded into
- * @id: mem_cgroup to be recorded
* @nr_ents: number of swap entries to be recorded
*
- * Returns old value at success, 0 at failure.
- * (Of course, old value can be 0.)
+ * Returns the existing old value.
*/
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
- unsigned int nr_ents)
+unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents)
{
- struct swap_cgroup_ctrl *ctrl;
- struct swap_cgroup *sc;
- unsigned short old;
- unsigned long flags;
pgoff_t offset = swp_offset(ent);
pgoff_t end = offset + nr_ents;
+ struct swap_cgroup *map;
+ unsigned short old, iter = 0;
+
+ offset = swp_offset(ent);
+ end = offset + nr_ents;
+ map = swap_cgroup_ctrl[swp_type(ent)].map;
- sc = lookup_swap_cgroup(ent, &ctrl);
-
- spin_lock_irqsave(&ctrl->lock, flags);
- old = sc->id;
- for (;;) {
- VM_BUG_ON(sc->id != old);
- sc->id = id;
- offset++;
- if (offset == end)
- break;
- if (offset % SC_PER_PAGE)
- sc++;
- else
- sc = __lookup_swap_cgroup(ctrl, offset);
- }
- spin_unlock_irqrestore(&ctrl->lock, flags);
+ do {
+ old = __swap_cgroup_id_xchg(map, offset, 0);
+ if (!iter)
+ iter = old;
+ VM_BUG_ON(iter != old);
+ } while (++offset != end);
return old;
}
@@ -161,37 +118,33 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
*/
unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
{
- return lookup_swap_cgroup(ent, NULL)->id;
+ struct swap_cgroup_ctrl *ctrl;
+
+ if (mem_cgroup_disabled())
+ return 0;
+
+ ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+ return __swap_cgroup_id_lookup(ctrl->map, swp_offset(ent));
}
int swap_cgroup_swapon(int type, unsigned long max_pages)
{
- void *array;
- unsigned long length;
+ struct swap_cgroup *map;
struct swap_cgroup_ctrl *ctrl;
if (mem_cgroup_disabled())
return 0;
- length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
-
- array = vcalloc(length, sizeof(void *));
- if (!array)
+ BUILD_BUG_ON(sizeof(unsigned short) * ID_PER_SC !=
+ sizeof(struct swap_cgroup));
+ map = vzalloc(DIV_ROUND_UP(max_pages, ID_PER_SC) *
+ sizeof(struct swap_cgroup));
+ if (!map)
goto nomem;
ctrl = &swap_cgroup_ctrl[type];
mutex_lock(&swap_cgroup_mutex);
- ctrl->length = length;
- ctrl->map = array;
- spin_lock_init(&ctrl->lock);
- if (swap_cgroup_prepare(type)) {
- /* memory shortage */
- ctrl->map = NULL;
- ctrl->length = 0;
- mutex_unlock(&swap_cgroup_mutex);
- vfree(array);
- goto nomem;
- }
+ ctrl->map = map;
mutex_unlock(&swap_cgroup_mutex);
return 0;
@@ -203,8 +156,7 @@ nomem:
void swap_cgroup_swapoff(int type)
{
- struct page **map;
- unsigned long i, length;
+ struct swap_cgroup *map;
struct swap_cgroup_ctrl *ctrl;
if (mem_cgroup_disabled())
@@ -213,19 +165,8 @@ void swap_cgroup_swapoff(int type)
mutex_lock(&swap_cgroup_mutex);
ctrl = &swap_cgroup_ctrl[type];
map = ctrl->map;
- length = ctrl->length;
ctrl->map = NULL;
- ctrl->length = 0;
mutex_unlock(&swap_cgroup_mutex);
- if (map) {
- for (i = 0; i < length; i++) {
- struct page *page = map[i];
- if (page)
- __free_page(page);
- if (!(i % SWAP_CLUSTER_MAX))
- cond_resched();
- }
- vfree(map);
- }
+ vfree(map);
}