summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-03-04 15:28:30 +0100
committerPaolo Abeni <pabeni@redhat.com>2025-03-04 15:28:31 +0100
commit5b62996184ca5bb86660bcd11d6c4560ce127df9 (patch)
tree8904af2193e237e503b694b4f7966c00b6a12fd7
parent188fa9b9e20a2579ed8f4088969158fb55059fa0 (diff)
parentd7a2522426e86036f40fde6ba055aa20de1f3d8a (diff)
Merge branch 'netconsole-add-taskname-sysdata-support'
Breno Leitao says: ==================== netconsole: Add taskname sysdata support This patchset introduces a new feature to the netconsole extradata subsystem that enables the inclusion of the current task's name in the sysdata output of netconsole messages. This enhancement is particularly valuable for large-scale deployments, such as Meta's, where netconsole collects messages from millions of servers and stores them in a data warehouse for analysis. Engineers often rely on these messages to investigate issues and assess kernel health. One common challenge we face is determining the context in which a particular message was generated. By including the task name (task->comm) with each message, this feature provides a direct answer to the frequently asked question: "What was running when this message was generated?" This added context will significantly improve our ability to diagnose and troubleshoot issues, making it easier to interpret output of netconsole. The patchset consists of seven patches that implement the following changes: * Refactor CPU number formatting into a separate function * Prefix CPU_NR sysdata feature with SYSDATA_ * Patch to covert a bitwise operation into boolean * Add configfs controls for taskname sysdata feature * Add taskname to extradata entry count * Add support for including task name in netconsole's extra data output * Document the task name feature in Documentation/networking/netconsole.rst * Add test coverage for the task name feature to the existing sysdata selftest script These changes allow users to enable or disable the task name feature via configfs and provide additional context for kernel messages by showing which task generated each console message. I have tested these patches on some servers and they seem to work as expected. v1: https://lore.kernel.org/r/20250221-netcons_current-v1-0-21c86ae8fc0d@debian.org Signed-off-by: Breno Leitao <leitao@debian.org> ==================== Link: https://patch.msgid.link/20250228-netcons_current-v2-0-f53ff79a0db2@debian.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--Documentation/networking/netconsole.rst28
-rw-r--r--drivers/net/netconsole.c95
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh51
3 files changed, 153 insertions, 21 deletions
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index 84803c59968a..ae82a6337a8d 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -240,6 +240,34 @@ Delete `userdata` entries with `rmdir`::
It is recommended to not write user data values with newlines.
+Task name auto population in userdata
+-------------------------------------
+
+Inside the netconsole configfs hierarchy, there is a file called
+`taskname_enabled` under the `userdata` directory. This file is used to enable
+or disable the automatic task name population feature. This feature
+automatically populates the current task name that is scheduled in the CPU
+sneding the message.
+
+To enable task name auto-population::
+
+ echo 1 > /sys/kernel/config/netconsole/target1/userdata/taskname_enabled
+
+When this option is enabled, the netconsole messages will include an additional
+line in the userdata field with the format `taskname=<task name>`. This allows
+the receiver of the netconsole messages to easily find which application was
+currently scheduled when that message was generated, providing extra context
+for kernel messages and helping to categorize them.
+
+Example::
+
+ echo "This is a message" > /dev/kmsg
+ 12,607,22085407756,-;This is a message
+ taskname=echo
+
+In this example, the message was generated while "echo" was the current
+scheduled process.
+
CPU number auto population in userdata
--------------------------------------
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index f77eddf22185..098ea9eb0237 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -103,7 +103,9 @@ struct netconsole_target_stats {
*/
enum sysdata_feature {
/* Populate the CPU that sends the message */
- CPU_NR = BIT(0),
+ SYSDATA_CPU_NR = BIT(0),
+ /* Populate the task name (as in current->comm) in sysdata */
+ SYSDATA_TASKNAME = BIT(1),
};
/**
@@ -418,12 +420,26 @@ static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf)
bool cpu_nr_enabled;
mutex_lock(&dynamic_netconsole_mutex);
- cpu_nr_enabled = !!(nt->sysdata_fields & CPU_NR);
+ cpu_nr_enabled = !!(nt->sysdata_fields & SYSDATA_CPU_NR);
mutex_unlock(&dynamic_netconsole_mutex);
return sysfs_emit(buf, "%d\n", cpu_nr_enabled);
}
+/* configfs helper to display if taskname sysdata feature is enabled */
+static ssize_t sysdata_taskname_enabled_show(struct config_item *item,
+ char *buf)
+{
+ struct netconsole_target *nt = to_target(item->ci_parent);
+ bool taskname_enabled;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ taskname_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME);
+ mutex_unlock(&dynamic_netconsole_mutex);
+
+ return sysfs_emit(buf, "%d\n", taskname_enabled);
+}
+
/*
* This one is special -- targets created through the configfs interface
* are not enabled (and the corresponding netpoll activated) by default.
@@ -699,7 +715,9 @@ static size_t count_extradata_entries(struct netconsole_target *nt)
/* Userdata entries */
entries = list_count_nodes(&nt->userdata_group.cg_children);
/* Plus sysdata entries */
- if (nt->sysdata_fields & CPU_NR)
+ if (nt->sysdata_fields & SYSDATA_CPU_NR)
+ entries += 1;
+ if (nt->sysdata_fields & SYSDATA_TASKNAME)
entries += 1;
return entries;
@@ -837,6 +855,40 @@ static void disable_sysdata_feature(struct netconsole_target *nt,
nt->extradata_complete[nt->userdata_length] = 0;
}
+static ssize_t sysdata_taskname_enabled_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct netconsole_target *nt = to_target(item->ci_parent);
+ bool taskname_enabled, curr;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &taskname_enabled);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME);
+ if (taskname_enabled == curr)
+ goto unlock_ok;
+
+ if (taskname_enabled &&
+ count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) {
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ if (taskname_enabled)
+ nt->sysdata_fields |= SYSDATA_TASKNAME;
+ else
+ disable_sysdata_feature(nt, SYSDATA_TASKNAME);
+
+unlock_ok:
+ ret = strnlen(buf, count);
+unlock:
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return ret;
+}
+
/* configfs helper to sysdata cpu_nr feature */
static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item,
const char *buf, size_t count)
@@ -850,7 +902,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item,
return ret;
mutex_lock(&dynamic_netconsole_mutex);
- curr = nt->sysdata_fields & CPU_NR;
+ curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR);
if (cpu_nr_enabled == curr)
/* no change requested */
goto unlock_ok;
@@ -865,13 +917,13 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item,
}
if (cpu_nr_enabled)
- nt->sysdata_fields |= CPU_NR;
+ nt->sysdata_fields |= SYSDATA_CPU_NR;
else
/* This is special because extradata_complete might have
* remaining data from previous sysdata, and it needs to be
* cleaned.
*/
- disable_sysdata_feature(nt, CPU_NR);
+ disable_sysdata_feature(nt, SYSDATA_CPU_NR);
unlock_ok:
ret = strnlen(buf, count);
@@ -882,6 +934,7 @@ unlock:
CONFIGFS_ATTR(userdatum_, value);
CONFIGFS_ATTR(sysdata_, cpu_nr_enabled);
+CONFIGFS_ATTR(sysdata_, taskname_enabled);
static struct configfs_attribute *userdatum_attrs[] = {
&userdatum_attr_value,
@@ -942,6 +995,7 @@ static void userdatum_drop(struct config_group *group, struct config_item *item)
static struct configfs_attribute *userdata_attrs[] = {
&sysdata_attr_cpu_nr_enabled,
+ &sysdata_attr_taskname_enabled,
NULL,
};
@@ -1117,28 +1171,41 @@ static void populate_configfs_item(struct netconsole_target *nt,
init_target_config_group(nt, target_name);
}
+static int append_cpu_nr(struct netconsole_target *nt, int offset)
+{
+ /* Append cpu=%d at extradata_complete after userdata str */
+ return scnprintf(&nt->extradata_complete[offset],
+ MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n",
+ raw_smp_processor_id());
+}
+
+static int append_taskname(struct netconsole_target *nt, int offset)
+{
+ return scnprintf(&nt->extradata_complete[offset],
+ MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n",
+ current->comm);
+}
/*
* prepare_extradata - append sysdata at extradata_complete in runtime
* @nt: target to send message to
*/
static int prepare_extradata(struct netconsole_target *nt)
{
- int sysdata_len, extradata_len;
+ u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME;
+ int extradata_len;
/* userdata was appended when configfs write helper was called
* by update_userdata().
*/
extradata_len = nt->userdata_length;
- if (!(nt->sysdata_fields & CPU_NR))
+ if (!(nt->sysdata_fields & fields))
goto out;
- /* Append cpu=%d at extradata_complete after userdata str */
- sysdata_len = scnprintf(&nt->extradata_complete[nt->userdata_length],
- MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n",
- raw_smp_processor_id());
-
- extradata_len += sysdata_len;
+ if (nt->sysdata_fields & SYSDATA_CPU_NR)
+ extradata_len += append_cpu_nr(nt, extradata_len);
+ if (nt->sysdata_fields & SYSDATA_TASKNAME)
+ extradata_len += append_taskname(nt, extradata_len);
WARN_ON_ONCE(extradata_len >
MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS);
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
index 2b78fd1f5982..f351206ed1bd 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -31,17 +31,38 @@ function set_cpu_nr() {
echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
}
+# Enable the taskname to be appended to sysdata
+function set_taskname() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]]
+ then
+ echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
# Disable the sysdata cpu_nr feature
function unset_cpu_nr() {
echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
}
-# Test if MSG content and `cpu=${CPU}` exists in OUTPUT_FILE
-function validate_sysdata_cpu_exists() {
+# Once called, taskname=<..> will not be appended anymore
+function unset_taskname() {
+ echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+# Test if MSG contains sysdata
+function validate_sysdata() {
# OUTPUT_FILE will contain something like:
# 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
# userdatakey=userdatavalue
# cpu=X
+ # taskname=<taskname>
+
+ # Echo is what this test uses to create the message. See runtest()
+ # function
+ SENDER="echo"
if [ ! -f "$OUTPUT_FILE" ]; then
echo "FAIL: File was not generated." >&2
@@ -62,12 +83,19 @@ function validate_sysdata_cpu_exists() {
exit "${ksft_fail}"
fi
+ if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
pkill_socat
}
-# Test if MSG content exists in OUTPUT_FILE but no `cpu=` string
-function validate_sysdata_no_cpu() {
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=`
+# strings
+function validate_no_sysdata() {
if [ ! -f "$OUTPUT_FILE" ]; then
echo "FAIL: File was not generated." >&2
exit "${ksft_fail}"
@@ -85,6 +113,12 @@ function validate_sysdata_no_cpu() {
exit "${ksft_fail}"
fi
+ if grep -q "taskname=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
}
@@ -133,10 +167,12 @@ OUTPUT_FILE="/tmp/${TARGET}_1"
MSG="Test #1 from CPU${CPU}"
# Enable the auto population of cpu_nr
set_cpu_nr
+# Enable taskname to be appended to sysdata
+set_taskname
runtest
# Make sure the message was received in the dst part
# and exit
-validate_sysdata_cpu_exists
+validate_sysdata
#====================================================
# TEST #2
@@ -148,7 +184,7 @@ OUTPUT_FILE="/tmp/${TARGET}_2"
MSG="Test #2 from CPU${CPU}"
set_user_data
runtest
-validate_sysdata_cpu_exists
+validate_sysdata
# ===================================================
# TEST #3
@@ -160,8 +196,9 @@ OUTPUT_FILE="/tmp/${TARGET}_3"
MSG="Test #3 from CPU${CPU}"
# Enable the auto population of cpu_nr
unset_cpu_nr
+unset_taskname
runtest
# At this time, cpu= shouldn't be present in the msg
-validate_sysdata_no_cpu
+validate_no_sysdata
exit "${ksft_pass}"