Merge tag 'tee-fixes-for-v5.4' of git://git.linaro.org/people/jens.wiklander/linux-tee into arm/fixes

Two OP-TE driver fixes: - Add proper cleanup on optee_enumerate_devices() failure - Make sure to register kernel allocations of dynamic shared memory * tag 'tee-fixes-for-v5.4' of git://git.linaro.org/people/jens.wiklander/linux-tee: (591 commits) tee: optee: fix device enumeration error handling tee: optee: Fix dynamic shm pool allocations Linux 5.4-rc3 tracing: Initialize iter->seq after zeroing in tracing_read_pipe() tracing/hwlat: Don't ignore outer-loop duration when calculating max_latency tracing/hwlat: Report total time spent in all NMIs during the sample recordmcount: Fix nop_mcount() function tracing: Do not create tracefs files if tracefs lockdown is in effect tracing: Add locked_down checks to the open calls of files created for tracefs tracing: Add tracing_check_open_get_tr() tracing: Have trace events system open call tracing_open_generic_tr() tracing: Get trace_array reference for available_tracers files ftrace: Get a reference counter for the trace_array on filter files tracefs: Revert ccbd54ff54e8 ("tracefs: Restrict tracefs when the kernel is locked down") perf/x86/cstate: Add Tiger Lake CPU support perf/x86/msr: Add Tiger Lake CPU support perf/x86/intel: Add Tiger Lake CPU support perf/x86/cstate: Update C-state counters for Ice Lake perf/x86/msr: Add new CPU model numbers for Ice Lake perf/x86/cstate: Add Comet Lake CPU support ... Link: https://lore.kernel.org/r/20191115105353.GA26176@jax Signed-off-by: Olof Johansson <olof@lixom.net>
author: Olof Johansson <olof@lixom.net> 2019-11-16 15:38:57 -0800
committer: Olof Johansson <olof@lixom.net> 2019-11-16 15:38:59 -0800
commit: db6efda85437d783dd921da5d67ec818826adb12 (patch)
tree: 466886d533eb39f2c795b287b8bf57f65d50c397 /mm/vmscan.c
parent: 002d3c65ee81a604430da61e20de7a5b32a0afd5 (diff)
parent: 03212e347f9443e524d6383c6806ac08295c1fb0 (diff)
1 files changed, 66 insertions, 6 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5d52d6a24af..c6659bb758a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2459,17 +2459,70 @@ out:
 	*lru_pages = 0;
 	for_each_evictable_lru(lru) {
 		int file = is_file_lru(lru);
-		unsigned long size;
+		unsigned long lruvec_size;
 		unsigned long scan;
+		unsigned long protection;
+
+		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+		protection = mem_cgroup_protection(memcg,
+						   sc->memcg_low_reclaim);
+
+		if (protection) {
+			/*
+			 * Scale a cgroup's reclaim pressure by proportioning
+			 * its current usage to its memory.low or memory.min
+			 * setting.
+			 *
+			 * This is important, as otherwise scanning aggression
+			 * becomes extremely binary -- from nothing as we
+			 * approach the memory protection threshold, to totally
+			 * nominal as we exceed it.  This results in requiring
+			 * setting extremely liberal protection thresholds. It
+			 * also means we simply get no protection at all if we
+			 * set it too low, which is not ideal.
+			 *
+			 * If there is any protection in place, we reduce scan
+			 * pressure by how much of the total memory used is
+			 * within protection thresholds.
+			 *
+			 * There is one special case: in the first reclaim pass,
+			 * we skip over all groups that are within their low
+			 * protection. If that fails to reclaim enough pages to
+			 * satisfy the reclaim goal, we come back and override
+			 * the best-effort low protection. However, we still
+			 * ideally want to honor how well-behaved groups are in
+			 * that case instead of simply punishing them all
+			 * equally. As such, we reclaim them based on how much
+			 * memory they are using, reducing the scan pressure
+			 * again by how much of the total memory used is under
+			 * hard protection.
+			 */
+			unsigned long cgroup_size = mem_cgroup_size(memcg);
+
+			/* Avoid TOCTOU with earlier protection check */
+			cgroup_size = max(cgroup_size, protection);
+
+			scan = lruvec_size - lruvec_size * protection /
+				cgroup_size;
+
+			/*
+			 * Minimally target SWAP_CLUSTER_MAX pages to keep
+			 * reclaim moving forwards, avoiding decremeting
+			 * sc->priority further than desirable.
+			 */
+			scan = max(scan, SWAP_CLUSTER_MAX);
+		} else {
+			scan = lruvec_size;
+		}
+
+		scan >>= sc->priority;
 
-		size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
-		scan = size >> sc->priority;
 		/*
 		 * If the cgroup's already been deleted, make sure to
 		 * scrape out the remaining cache.
 		 */
 		if (!scan && !mem_cgroup_online(memcg))
-			scan = min(size, SWAP_CLUSTER_MAX);
+			scan = min(lruvec_size, SWAP_CLUSTER_MAX);
 
 		switch (scan_balance) {
 		case SCAN_EQUAL:
@@ -2489,7 +2542,7 @@ out:
 		case SCAN_ANON:
 			/* Scan one type exclusively */
 			if ((scan_balance == SCAN_FILE) != file) {
-				size = 0;
+				lruvec_size = 0;
 				scan = 0;
 			}
 			break;
@@ -2498,7 +2551,7 @@ out:
 			BUG();
 		}
 
-		*lru_pages += size;
+		*lru_pages += lruvec_size;
 		nr[lru] = scan;
 	}
 }
@@ -2742,6 +2795,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 				memcg_memory_event(memcg, MEMCG_LOW);
 				break;
 			case MEMCG_PROT_NONE:
+				/*
+				 * All protection thresholds breached. We may
+				 * still choose to vary the scan pressure
+				 * applied based on by how much the cgroup in
+				 * question has exceeded its protection
+				 * thresholds (see get_scan_count).
+				 */
 				break;
 			}
author	Olof Johansson <olof@lixom.net>	2019-11-16 15:38:57 -0800
committer	Olof Johansson <olof@lixom.net>	2019-11-16 15:38:59 -0800
commit	db6efda85437d783dd921da5d67ec818826adb12 (patch)
tree	466886d533eb39f2c795b287b8bf57f65d50c397 /mm/vmscan.c
parent	002d3c65ee81a604430da61e20de7a5b32a0afd5 (diff)
parent	03212e347f9443e524d6383c6806ac08295c1fb0 (diff)