summaryrefslogtreecommitdiff
path: root/tools/workqueue
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2023-08-07 15:57:24 -1000
committerTejun Heo <tj@kernel.org>2023-08-07 15:57:24 -1000
commit63c5484e74952f60f5810256bd69814d167b8d22 (patch)
treee045e2d922b5f462856ffb2058621d8e63e8a59e /tools/workqueue
parent025e16845877e80cb169274b330c236056ba553c (diff)
workqueue: Add multiple affinity scopes and interface to select them
Add three more affinity scopes - WQ_AFFN_CPU, SMT and CACHE - and make CACHE the default. The code changes to actually add the additional scopes are trivial. Also add module parameter "workqueue.default_affinity_scope" to override the default scope and "affinity_scope" sysfs file to configure it per workqueue. wq_dump.py and documentations are updated accordingly. This enables significant flexibility in configuring how unbound workqueues behave. If affinity scope is set to "cpu", it'll behave close to a per-cpu workqueue. On the other hand, "system" removes all locality boundaries. Many modern machines have multiple L3 caches often while being mostly uniform in terms of memory access. Thus, workqueue's previous behavior of spreading work items in each NUMA node had negative performance implications from unncessarily crossing L3 boundaries between issue and execution. However, picking a finer grained affinity scope also has a downside in that an issuer in one group can't utilize CPUs in other groups. While dependent on the specifics of workload, there's usually a noticeable penalty in crossing L3 boundaries, so let's default to CACHE. This issue will be further addressed and documented with examples in future patches. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'tools/workqueue')
-rw-r--r--tools/workqueue/wq_dump.py15
1 files changed, 9 insertions, 6 deletions
diff --git a/tools/workqueue/wq_dump.py b/tools/workqueue/wq_dump.py
index ddd0bb4395ea..43ab71a193b8 100644
--- a/tools/workqueue/wq_dump.py
+++ b/tools/workqueue/wq_dump.py
@@ -78,11 +78,16 @@ worker_pool_idr = prog['worker_pool_idr']
workqueues = prog['workqueues']
wq_unbound_cpumask = prog['wq_unbound_cpumask']
wq_pod_types = prog['wq_pod_types']
+wq_affn_dfl = prog['wq_affn_dfl']
+wq_affn_names = prog['wq_affn_names']
WQ_UNBOUND = prog['WQ_UNBOUND']
WQ_ORDERED = prog['__WQ_ORDERED']
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
+WQ_AFFN_CPU = prog['WQ_AFFN_CPU']
+WQ_AFFN_SMT = prog['WQ_AFFN_SMT']
+WQ_AFFN_CACHE = prog['WQ_AFFN_CACHE']
WQ_AFFN_NUMA = prog['WQ_AFFN_NUMA']
WQ_AFFN_SYSTEM = prog['WQ_AFFN_SYSTEM']
@@ -109,12 +114,10 @@ def print_pod_type(pt):
print(f' [{cpu}]={pt.cpu_pod[cpu].value_()}', end='')
print('')
-print('')
-print('NUMA')
-print_pod_type(wq_pod_types[WQ_AFFN_NUMA])
-print('')
-print('SYSTEM')
-print_pod_type(wq_pod_types[WQ_AFFN_SYSTEM])
+for affn in [WQ_AFFN_CPU, WQ_AFFN_SMT, WQ_AFFN_CACHE, WQ_AFFN_NUMA, WQ_AFFN_SYSTEM]:
+ print('')
+ print(f'{wq_affn_names[affn].string_().decode().upper()}{" (default)" if affn == wq_affn_dfl else ""}')
+ print_pod_type(wq_pod_types[affn])
print('')
print('Worker Pools')