From 18f41fa616ee4d66c67033eb46b951bf6e1b4a12 Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Fri, 20 Jan 2023 03:46:21 +0000 Subject: mm: memory-failure: bump memory failure stats to pglist_data Right before memory_failure finishes its handling, accumulate poisoned page's resolution counters to pglist_data's memory_failure_stats, so as to update the corresponding sysfs entries. Tested: 1) Start an application to allocate memory buffer chunks 2) Convert random memory buffer addresses to physical addresses 3) Inject memory errors using EINJ at chosen physical addresses 4) Access poisoned memory buffer and recover from SIGBUS 5) Check counter values under /sys/devices/system/node/node*/memory_failure/* Link: https://lkml.kernel.org/r/20230120034622.2698268-3-jiaqiyan@google.com Signed-off-by: Jiaqi Yan Acked-by: David Rientjes Acked-by: Naoya Horiguchi Cc: Kefeng Wang Cc: Tony Luck Cc: Yang Shi Signed-off-by: Andrew Morton --- mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 44eec2e93a0b..b4b30d9b0782 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = { #undef slab #undef reserved +static void update_per_node_mf_stats(unsigned long pfn, + enum mf_result result) +{ + int nid = MAX_NUMNODES; + struct memory_failure_stats *mf_stats = NULL; + + nid = pfn_to_nid(pfn); + if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) { + WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid); + return; + } + + mf_stats = &NODE_DATA(nid)->mf_stats; + switch (result) { + case MF_IGNORED: + ++mf_stats->ignored; + break; + case MF_FAILED: + ++mf_stats->failed; + break; + case MF_DELAYED: + ++mf_stats->delayed; + break; + case MF_RECOVERED: + ++mf_stats->recovered; + break; + default: + WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result); + break; + } + ++mf_stats->total; +} + /* * "Dirty/Clean" indication is not 100% accurate due to the possibility of * setting PG_dirty outside page lock. See also comment above set_page_dirty(). @@ -1237,6 +1270,9 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type, trace_memory_failure_event(pfn, type, result); num_poisoned_pages_inc(pfn); + + update_per_node_mf_stats(pfn, result); + pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); -- cgit