summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap9
-rw-r--r--CREDITS9
-rw-r--r--Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump71
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-kexec-kdump61
-rw-r--r--Documentation/admin-guide/dynamic-debug-howto.rst5
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt20
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst32
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/core-api/kho/concepts.rst2
-rw-r--r--Documentation/core-api/liveupdate.rst61
-rw-r--r--Documentation/dev-tools/checkpatch.rst10
-rw-r--r--Documentation/driver-api/hw-recoverable-errors.rst60
-rw-r--r--Documentation/driver-api/index.rst1
-rw-r--r--Documentation/mm/index.rst1
-rw-r--r--Documentation/mm/memfd_preservation.rst23
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst2
-rw-r--r--Documentation/userspace-api/liveupdate.rst20
-rw-r--r--MAINTAINERS34
-rw-r--r--arch/Kconfig19
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/configs/aspeed_g5_defconfig2
-rw-r--r--arch/powerpc/include/asm/crash_reserve.h8
-rw-r--r--arch/x86/include/asm/div64.h39
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--drivers/acpi/apei/ghes.c36
-rw-r--r--drivers/nvme/common/auth.c4
-rw-r--r--drivers/pci/pcie/aer.c2
-rw-r--r--fs/ceph/crypto.c60
-rw-r--r--fs/ceph/crypto.h6
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/crypto/fname.c89
-rw-r--r--fs/nilfs2/ioctl.c35
-rw-r--r--fs/ocfs2/dir.c42
-rw-r--r--fs/ocfs2/inode.c49
-rw-r--r--fs/ocfs2/move_extents.c14
-rw-r--r--fs/ocfs2/ocfs2_fs.h22
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/proc/page.c1
-rw-r--r--include/linux/base64.h10
-rw-r--r--include/linux/compiler.h6
-rw-r--r--include/linux/crash_reserve.h6
-rw-r--r--include/linux/dynamic_debug.h17
-rw-r--r--include/linux/kexec_handover.h57
-rw-r--r--include/linux/kho/abi/luo.h166
-rw-r--r--include/linux/kho/abi/memfd.h77
-rw-r--r--include/linux/liveupdate.h138
-rw-r--r--include/linux/math.h13
-rw-r--r--include/linux/math64.h59
-rw-r--r--include/linux/once_lite.h2
-rw-r--r--include/linux/panic.h1
-rw-r--r--include/linux/rbtree.h32
-rw-r--r--include/linux/shmem_fs.h23
-rw-r--r--include/linux/sys_info.h2
-rw-r--r--include/linux/uaccess.h6
-rw-r--r--include/linux/util_macros.h4
-rw-r--r--include/linux/vmcore_info.h8
-rw-r--r--include/linux/xxhash.h46
-rw-r--r--include/uapi/linux/liveupdate.h216
-rw-r--r--include/uapi/linux/vmcore.h9
-rw-r--r--init/Kconfig20
-rw-r--r--init/calibrate.c13
-rw-r--r--init/main.c97
-rw-r--r--ipc/namespace.c9
-rw-r--r--kernel/Kconfig.kexec24
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/configs/debug.config2
-rw-r--r--kernel/crash_reserve.c3
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c63
-rw-r--r--kernel/hung_task.c56
-rw-r--r--kernel/kexec_core.c161
-rw-r--r--kernel/kexec_handover_internal.h20
-rw-r--r--kernel/ksysfs.c68
-rw-r--r--kernel/liveupdate/Kconfig75
-rw-r--r--kernel/liveupdate/Makefile12
-rw-r--r--kernel/liveupdate/kexec_handover.c (renamed from kernel/kexec_handover.c)716
-rw-r--r--kernel/liveupdate/kexec_handover_debug.c (renamed from kernel/kexec_handover_debug.c)0
-rw-r--r--kernel/liveupdate/kexec_handover_debugfs.c221
-rw-r--r--kernel/liveupdate/kexec_handover_internal.h55
-rw-r--r--kernel/liveupdate/luo_core.c450
-rw-r--r--kernel/liveupdate/luo_file.c889
-rw-r--r--kernel/liveupdate/luo_internal.h110
-rw-r--r--kernel/liveupdate/luo_session.c646
-rw-r--r--kernel/module/main.c2
-rw-r--r--kernel/panic.c52
-rw-r--r--kernel/resource.c10
-rw-r--r--kernel/scs.c2
-rw-r--r--kernel/vmcore_info.c17
-rw-r--r--kernel/watchdog.c44
-rw-r--r--lib/Kconfig.debug63
-rw-r--r--lib/base64.c189
-rw-r--r--lib/dynamic_debug.c1
-rw-r--r--lib/math/div64.c185
-rw-r--r--lib/math/test_mul_u64_u64_div_u64.c191
-rw-r--r--lib/plist.c4
-rw-r--r--lib/ratelimit.c2
-rw-r--r--lib/rbtree.c29
-rw-r--r--lib/sys_info.c169
-rw-r--r--lib/test_kho.c140
-rw-r--r--lib/tests/Makefile1
-rw-r--r--lib/tests/base64_kunit.c294
-rw-r--r--lib/usercopy.c4
-rw-r--r--lib/xxhash.c29
-rw-r--r--lib/xz/xz_dec_bcj.c95
-rw-r--r--lib/xz/xz_private.h4
-rw-r--r--mm/Makefile1
-rw-r--r--mm/internal.h6
-rw-r--r--mm/memblock.c93
-rw-r--r--mm/memfd_luo.c516
-rw-r--r--mm/shmem.c49
-rw-r--r--rust/helpers/rbtree.c10
-rw-r--r--rust/helpers/uaccess.c12
-rw-r--r--samples/Kconfig22
-rw-r--r--samples/vfs/Makefile1
-rwxr-xr-xscripts/checkpatch.pl11
-rw-r--r--scripts/gdb/linux/bpf.py253
-rw-r--r--scripts/gdb/linux/constants.py.in3
-rw-r--r--scripts/gdb/linux/radixtree.py139
-rw-r--r--scripts/gdb/linux/symbols.py105
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c2
-rw-r--r--tools/testing/selftests/alsa/conf.c2
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c2
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c2
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c2
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c2
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c2
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c2
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c2
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c2
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c2
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c2
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c2
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c2
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c2
-rw-r--r--tools/testing/selftests/cgroup/test_core.c2
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c2
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c2
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c2
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c2
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c2
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c2
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c2
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h2
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/connector/proc_filter.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c2
-rw-r--r--tools/testing/selftests/core/unshare_test.c2
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c2
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c2
-rw-r--r--tools/testing/selftests/drivers/net/gro.c2
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c2
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c2
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c2
-rw-r--r--tools/testing/selftests/exec/check-exec.c2
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/exec/load_address.c2
-rw-r--r--tools/testing/selftests/exec/non-regular.c2
-rw-r--r--tools/testing/selftests/exec/null-argv.c2
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c2
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c2
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c2
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c2
-rw-r--r--tools/testing/selftests/filesystems/fclog.c2
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c2
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c2
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c2
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c2
-rw-r--r--tools/testing/selftests/filesystems/utils.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c2
-rw-r--r--tools/testing/selftests/hid/hid_common.h2
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h2
-rw-r--r--tools/testing/selftests/ipc/msgque.c2
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c2
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh1
-rw-r--r--tools/testing/selftests/kselftest_harness.h2
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c2
-rw-r--r--tools/testing/selftests/landlock/audit.h2
-rw-r--r--tools/testing/selftests/landlock/common.h2
-rw-r--r--tools/testing/selftests/lib.mk3
-rw-r--r--tools/testing/selftests/liveupdate/.gitignore9
-rw-r--r--tools/testing/selftests/liveupdate/Makefile34
-rw-r--r--tools/testing/selftests/liveupdate/config11
-rwxr-xr-xtools/testing/selftests/liveupdate/do_kexec.sh16
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c348
-rw-r--r--tools/testing/selftests/liveupdate/luo_kexec_simple.c89
-rw-r--r--tools/testing/selftests/liveupdate/luo_multi_session.c162
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c266
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h44
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h2
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c4
-rw-r--r--tools/testing/selftests/mm/compaction_test.c2
-rw-r--r--tools/testing/selftests/mm/cow.c2
-rw-r--r--tools/testing/selftests/mm/droppable.c2
-rw-r--r--tools/testing/selftests/mm/guard-regions.c2
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c2
-rw-r--r--tools/testing/selftests/mm/gup_test.c2
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c2
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c2
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c2
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/mm/madv_populate.c2
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c2
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c2
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c2
-rw-r--r--tools/testing/selftests/mm/merge.c2
-rw-r--r--tools/testing/selftests/mm/migration.c2
-rw-r--r--tools/testing/selftests/mm/mkdirty.c2
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c2
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c2
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c2
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c2
-rw-r--r--tools/testing/selftests/mm/mremap_test.c2
-rw-r--r--tools/testing/selftests/mm/mseal_test.c2
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c2
-rw-r--r--tools/testing/selftests/mm/pfnmap.c2
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h2
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c2
-rw-r--r--tools/testing/selftests/mm/process_madv.c2
-rw-r--r--tools/testing/selftests/mm/rmap.c2
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c2
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c2
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.h2
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c2
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c2
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c2
-rw-r--r--tools/testing/selftests/mm/vm_util.c2
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c2
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c4
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c2
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c2
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c2
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c2
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c2
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rw-r--r--tools/testing/selftests/net/ipsec.c2
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c2
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c2
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c2
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c2
-rw-r--r--tools/testing/selftests/net/psock_fanout.c2
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c2
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c2
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/socket.c2
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c2
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c2
-rw-r--r--tools/testing/selftests/net/tls.c2
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rw-r--r--tools/testing/selftests/openat2/helpers.h2
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c2
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c2
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c2
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c2
-rw-r--r--tools/testing/selftests/perf_events/mmap.c2
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c2
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c2
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c2
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c2
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c2
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c2
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c2
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c2
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c2
-rw-r--r--tools/testing/selftests/ptrace/get_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c2
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c2
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c2
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c2
-rw-r--r--tools/testing/selftests/rseq/rseq.c2
-rw-r--r--tools/testing/selftests/rtc/rtctest.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2
-rw-r--r--tools/testing/selftests/sgx/main.c2
-rw-r--r--tools/testing/selftests/signal/mangle_uc_sigmask.c2
-rw-r--r--tools/testing/selftests/signal/sas.c2
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c2
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c2
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c2
-rw-r--r--tools/testing/selftests/timens/timens.h2
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
-rw-r--r--tools/testing/selftests/timers/freq-step.c2
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c2
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c2
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c2
-rw-r--r--tools/testing/selftests/timers/nanosleep.c2
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c2
-rw-r--r--tools/testing/selftests/timers/posix_timers.c2
-rw-r--r--tools/testing/selftests/timers/raw_skew.c2
-rw-r--r--tools/testing/selftests/timers/rtcpie.c2
-rw-r--r--tools/testing/selftests/timers/set-2038.c2
-rw-r--r--tools/testing/selftests/timers/set-tai.c2
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c2
-rw-r--r--tools/testing/selftests/timers/set-tz.c2
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c2
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c2
-rw-r--r--tools/testing/selftests/user_events/abi_test.c2
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c2
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c2
-rw-r--r--tools/testing/selftests/user_events/perf_test.c2
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c2
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c2
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c2
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c2
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c2
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c2
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c2
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config2
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c2
-rw-r--r--tools/testing/selftests/x86/helpers.h2
-rw-r--r--tools/testing/selftests/x86/lam.c2
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c2
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c2
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c2
-rw-r--r--tools/testing/selftests/x86/xstate.h2
439 files changed, 8031 insertions, 1801 deletions
diff --git a/.mailmap b/.mailmap
index 85307aeed9da..84309a39d329 100644
--- a/.mailmap
+++ b/.mailmap
@@ -303,6 +303,7 @@ Hans de Goede <hansg@kernel.org> <hdegoede@redhat.com>
Hans Verkuil <hverkuil@kernel.org> <hverkuil@xs4all.nl>
Hans Verkuil <hverkuil@kernel.org> <hverkuil-cisco@xs4all.nl>
Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com>
+Hao Ge <hao.ge@linux.dev> <gehao@kylinos.cn>
Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com>
Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com>
Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com>
@@ -503,9 +504,7 @@ Mark Brown <broonie@sirena.org.uk>
Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com>
Markus Schneider-Pargmann <msp@baylibre.com> <mpa@pengutronix.de>
Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
-Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
-Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
-Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martin Kepplinger-Novakovic <martink@posteo.de> <martin.kepplinger-novakovic@ginzinger.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <othacehe@gnu.org> <m.othacehe@gmail.com>
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
@@ -856,6 +855,9 @@ Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
+WangYuli <wangyuli@aosc.io> <wangyl5933@chinaunicom.cn>
+WangYuli <wangyuli@aosc.io> <wangyuli@deepin.org>
+WangYuli <wangyuli@aosc.io> <wangyuli@uniontech.com>
Weiwen Hu <huweiwen@linux.alibaba.com> <sehuww@mail.scut.edu.cn>
WeiXiong Liao <gmpy.liaowx@gmail.com> <liaoweixiong@allwinnertech.com>
Wen Gong <quic_wgong@quicinc.com> <wgong@codeaurora.org>
@@ -867,6 +869,7 @@ Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
+Yu-Chun Lin <eleanor.lin@realtek.com> <eleanor15x@gmail.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
diff --git a/CREDITS b/CREDITS
index fa5397f4ebcd..85bdc8828734 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2056,16 +2056,15 @@ S: Korte Heul 95
S: 1403 ND BUSSUM
S: The Netherlands
-N: Martin Kepplinger
+N: Martin Kepplinger-Novakovic
E: martink@posteo.de
-E: martin.kepplinger@puri.sm
-W: http://www.martinkepplinger.com
P: 4096R/5AB387D3 F208 2B88 0F9E 4239 3468 6E3F 5003 98DF 5AB3 87D3
D: mma8452 accelerators iio driver
D: pegasus_notetaker input driver
+D: imx8m media and hi846 sensor driver
D: Kernel fixes and cleanups
-S: Garnisonstraße 26
-S: 4020 Linz
+S: Keplerstr. 6
+S: 4050 Traun
S: Austria
N: Karl Keyte
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump b/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump
new file mode 100644
index 000000000000..ba26a6a1d2be
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump
@@ -0,0 +1,71 @@
+NOTE: all the ABIs listed in this file are deprecated and will be removed after 2028.
+
+Here are the alternative ABIs:
++------------------------------------+-----------------------------------------+
+| Deprecated | Alternative |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_loaded | /sys/kernel/kexec/loaded |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_loaded | /sys/kernel/kexec/crash_loaded |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_size | /sys/kernel/kexec/crash_size |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/crash_elfcorehdr_size | /sys/kernel/kexec/crash_elfcorehdr_size |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_cma_ranges | /sys/kernel/kexec/crash_cma_ranges |
++------------------------------------+-----------------------------------------+
+
+
+What: /sys/kernel/kexec_loaded
+Date: Jun 2006
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a new kernel image has been loaded
+ into memory using the kexec system call. It shows 1 if
+ a kexec image is present and ready to boot, or 0 if none
+ is loaded.
+User: kexec tools, kdump service
+
+What: /sys/kernel/kexec_crash_loaded
+Date: Jun 2006
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a crash (kdump) kernel is currently
+ loaded into memory. It shows 1 if a crash kernel has been
+ successfully loaded for panic handling, or 0 if no crash
+ kernel is present.
+User: Kexec tools, Kdump service
+
+What: /sys/kernel/kexec_crash_size
+Date: Dec 2009
+Contact: kexec@lists.infradead.org
+Description: read/write
+ Shows the amount of memory reserved for loading the crash
+ (kdump) kernel. It reports the size, in bytes, of the
+ crash kernel area defined by the crashkernel= parameter.
+ This interface also allows reducing the crashkernel
+ reservation by writing a smaller value, and the reclaimed
+ space is added back to the system RAM.
+User: Kdump service
+
+What: /sys/kernel/crash_elfcorehdr_size
+Date: Aug 2023
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates the preferred size of the memory buffer for the
+ ELF core header used by the crash (kdump) kernel. It defines
+ how much space is needed to hold metadata about the crashed
+ system, including CPU and memory information. This information
+ is used by the user space utility kexec to support updating the
+ in-kernel kdump image during hotplug operations.
+User: Kexec tools
+
+What: /sys/kernel/kexec_crash_cma_ranges
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Provides information about the memory ranges reserved from
+ the Contiguous Memory Allocator (CMA) area that are allocated
+ to the crash (kdump) kernel. It lists the start and end physical
+ addresses of CMA regions assigned for crashkernel use.
+User: kdump service
diff --git a/Documentation/ABI/testing/sysfs-kernel-kexec-kdump b/Documentation/ABI/testing/sysfs-kernel-kexec-kdump
new file mode 100644
index 000000000000..f59051b5d96d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-kexec-kdump
@@ -0,0 +1,61 @@
+What: /sys/kernel/kexec/*
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description:
+ The /sys/kernel/kexec/* directory contains sysfs files
+ that provide information about the configuration status
+ of kexec and kdump.
+
+What: /sys/kernel/kexec/loaded
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a new kernel image has been loaded
+ into memory using the kexec system call. It shows 1 if
+ a kexec image is present and ready to boot, or 0 if none
+ is loaded.
+User: kexec tools, kdump service
+
+What: /sys/kernel/kexec/crash_loaded
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a crash (kdump) kernel is currently
+ loaded into memory. It shows 1 if a crash kernel has been
+ successfully loaded for panic handling, or 0 if no crash
+ kernel is present.
+User: Kexec tools, Kdump service
+
+What: /sys/kernel/kexec/crash_size
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read/write
+ Shows the amount of memory reserved for loading the crash
+ (kdump) kernel. It reports the size, in bytes, of the
+ crash kernel area defined by the crashkernel= parameter.
+ This interface also allows reducing the crashkernel
+ reservation by writing a smaller value, and the reclaimed
+ space is added back to the system RAM.
+User: Kdump service
+
+What: /sys/kernel/kexec/crash_elfcorehdr_size
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates the preferred size of the memory buffer for the
+ ELF core header used by the crash (kdump) kernel. It defines
+ how much space is needed to hold metadata about the crashed
+ system, including CPU and memory information. This information
+ is used by the user space utility kexec to support updating the
+ in-kernel kdump image during hotplug operations.
+User: Kexec tools
+
+What: /sys/kernel/kexec/crash_cma_ranges
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Provides information about the memory ranges reserved from
+ the Contiguous Memory Allocator (CMA) area that are allocated
+ to the crash (kdump) kernel. It lists the start and end physical
+ addresses of CMA regions assigned for crashkernel use.
+User: kdump service
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst
index 7c036590cd07..095a63892257 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -223,12 +223,13 @@ The flags are::
f Include the function name
s Include the source file name
l Include line number
+ d Include call trace
For ``print_hex_dump_debug()`` and ``print_hex_dump_bytes()``, only
the ``p`` flag has meaning, other flags are ignored.
-Note the regexp ``^[-+=][fslmpt_]+$`` matches a flags specification.
-To clear all flags at once, use ``=_`` or ``-fslmpt``.
+Note the regexp ``^[-+=][fslmptd_]+$`` matches a flags specification.
+To clear all flags at once, use ``=_`` or ``-fslmptd``.
Debug messages during Boot Process
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b242519f57da..b86b6d946d88 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2114,14 +2114,20 @@ Kernel parameters
the added memory block itself do not be affected.
hung_task_panic=
- [KNL] Should the hung task detector generate panics.
- Format: 0 | 1
+ [KNL] Number of hung tasks to trigger kernel panic.
+ Format: <int>
+
+ When set to a non-zero value, a kernel panic will be triggered if
+ the number of detected hung tasks reaches this value.
+
+ 0: don't panic
+ 1: panic immediately on first hung task
+ N: panic after N hung tasks are detected in a single scan
- A value of 1 instructs the kernel to panic when a
- hung task is detected. The default value is controlled
- by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
- option. The value selected by this boot parameter can
- be changed later by the kernel.hung_task_panic sysctl.
+ The default value is controlled by the
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time option. The value
+ selected by this boot parameter can be changed later by the
+ kernel.hung_task_panic sysctl.
hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
terminal devices. Valid values: 0..8
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index f3ee807b5d8b..239da22c4e28 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -397,13 +397,14 @@ a hung task is detected.
hung_task_panic
===============
-Controls the kernel's behavior when a hung task is detected.
+When set to a non-zero value, a kernel panic will be triggered if the
+number of hung tasks found during a single scan reaches this value.
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-= =================================================
+= =======================================================
0 Continue operation. This is the default behavior.
-1 Panic immediately.
-= =================================================
+N Panic when N hung tasks are found during a single scan.
+= =======================================================
hung_task_check_count
@@ -421,6 +422,11 @@ the system boot.
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+hung_task_sys_info
+==================
+A comma separated list of extra system information to be dumped when
+hung task is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
hung_task_timeout_secs
======================
@@ -515,6 +521,15 @@ default), only processes with the CAP_SYS_ADMIN capability may create
io_uring instances.
+kernel_sys_info
+===============
+A comma separated list of extra system information to be dumped when
+soft/hard lockup is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
+
+It serves as the default kernel control knob, which will take effect
+when a kernel module calls sys_info() with parameter==0.
+
kexec_load_disabled
===================
@@ -576,6 +591,11 @@ if leaking kernel pointer values to unprivileged users is a concern.
When ``kptr_restrict`` is set to 2, kernel pointers printed using
%pK will be replaced with 0s regardless of privileges.
+softlockup_sys_info & hardlockup_sys_info
+=========================================
+A comma separated list of extra system information to be dumped when
+soft/hard lockup is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
modprobe
========
@@ -910,8 +930,8 @@ to 'panic_print'. Possible values are:
============= ===================================================
tasks print all tasks info
mem print system memory info
-timer print timers info
-lock print locks info if CONFIG_LOCKDEP is on
+timers print timers info
+locks print locks info if CONFIG_LOCKDEP is on
ftrace print ftrace buffer
all_bt print all CPUs backtrace (if available in the arch)
blocked_tasks print only tasks in uninterruptible (blocked) state
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 6cbdcbfa79c3..5eb0fbbbc323 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -138,6 +138,7 @@ Documents that don't fit elsewhere or which have yet to be categorized.
:maxdepth: 1
librs
+ liveupdate
netlink
.. only:: subproject and html
diff --git a/Documentation/core-api/kho/concepts.rst b/Documentation/core-api/kho/concepts.rst
index 36d5c05cfb30..d626d1dbd678 100644
--- a/Documentation/core-api/kho/concepts.rst
+++ b/Documentation/core-api/kho/concepts.rst
@@ -70,5 +70,5 @@ in the FDT. That state is called the KHO finalization phase.
Public API
==========
-.. kernel-doc:: kernel/kexec_handover.c
+.. kernel-doc:: kernel/liveupdate/kexec_handover.c
:export:
diff --git a/Documentation/core-api/liveupdate.rst b/Documentation/core-api/liveupdate.rst
new file mode 100644
index 000000000000..7960eb15a81f
--- /dev/null
+++ b/Documentation/core-api/liveupdate.rst
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Live Update Orchestrator
+========================
+:Author: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+.. kernel-doc:: kernel/liveupdate/luo_core.c
+ :doc: Live Update Orchestrator (LUO)
+
+LUO Sessions
+============
+.. kernel-doc:: kernel/liveupdate/luo_session.c
+ :doc: LUO Sessions
+
+LUO Preserving File Descriptors
+===============================
+.. kernel-doc:: kernel/liveupdate/luo_file.c
+ :doc: LUO File Descriptors
+
+Live Update Orchestrator ABI
+============================
+.. kernel-doc:: include/linux/kho/abi/luo.h
+ :doc: Live Update Orchestrator ABI
+
+The following types of file descriptors can be preserved
+
+.. toctree::
+ :maxdepth: 1
+
+ ../mm/memfd_preservation
+
+Public API
+==========
+.. kernel-doc:: include/linux/liveupdate.h
+
+.. kernel-doc:: include/linux/kho/abi/luo.h
+ :functions:
+
+.. kernel-doc:: kernel/liveupdate/luo_core.c
+ :export:
+
+.. kernel-doc:: kernel/liveupdate/luo_file.c
+ :export:
+
+Internal API
+============
+.. kernel-doc:: kernel/liveupdate/luo_core.c
+ :internal:
+
+.. kernel-doc:: kernel/liveupdate/luo_session.c
+ :internal:
+
+.. kernel-doc:: kernel/liveupdate/luo_file.c
+ :internal:
+
+See Also
+========
+
+- :doc:`Live Update uAPI </userspace-api/liveupdate>`
+- :doc:`/core-api/kho/concepts`
diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index dfaad0a279ff..fa2988dd4657 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -1238,6 +1238,16 @@ Others
The patch file does not appear to be in unified-diff format. Please
regenerate the patch file before sending it to the maintainer.
+ **PLACEHOLDER_USE**
+ Detects unhandled placeholder text left in cover letters or commit headers/logs.
+ Common placeholders include lines like::
+
+ *** SUBJECT HERE ***
+ *** BLURB HERE ***
+
+ These typically come from autogenerated templates. Replace them with a proper
+ subject and description before sending.
+
**PRINTF_0XDECIMAL**
Prefixing 0x with decimal output is defective and should be corrected.
diff --git a/Documentation/driver-api/hw-recoverable-errors.rst b/Documentation/driver-api/hw-recoverable-errors.rst
new file mode 100644
index 000000000000..fc526c3454bd
--- /dev/null
+++ b/Documentation/driver-api/hw-recoverable-errors.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Recoverable Hardware Error Tracking in vmcoreinfo
+=================================================
+
+Overview
+--------
+
+This feature provides a generic infrastructure within the Linux kernel to track
+and log recoverable hardware errors. These are hardware recoverable errors
+visible that might not cause immediate panics but may influence health, mainly
+because new code path will be executed in the kernel.
+
+By recording counts and timestamps of recoverable errors into the vmcoreinfo
+crash dump notes, this infrastructure aids post-mortem crash analysis tools in
+correlating hardware events with kernel failures. This enables faster triage
+and better understanding of root causes, especially in large-scale cloud
+environments where hardware issues are common.
+
+Benefits
+--------
+
+- Facilitates correlation of hardware recoverable errors with kernel panics or
+ unusual code paths that lead to system crashes.
+- Provides operators and cloud providers quick insights, improving reliability
+ and reducing troubleshooting time.
+- Complements existing full hardware diagnostics without replacing them.
+
+Data Exposure and Consumption
+-----------------------------
+
+- The tracked error data consists of per-error-type counts and timestamps of
+ last occurrence.
+- This data is stored in the `hwerror_data` array, categorized by error source
+ types like CPU, memory, PCI, CXL, and others.
+- It is exposed via vmcoreinfo crash dump notes and can be read using tools
+ like `crash`, `drgn`, or other kernel crash analysis utilities.
+- There is no other way to read these data other than from crash dumps.
+- These errors are divided by area, which includes CPU, Memory, PCI, CXL and
+ others.
+
+Typical usage example (in drgn REPL):
+
+.. code-block:: python
+
+ >>> prog['hwerror_data']
+ (struct hwerror_info[HWERR_RECOV_MAX]){
+ {
+ .count = (int)844,
+ .timestamp = (time64_t)1752852018,
+ },
+ ...
+ }
+
+Enabling
+--------
+
+- This feature is enabled when CONFIG_VMCORE_INFO is set.
+
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index baff96b5cf0b..1833e6a0687e 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -97,6 +97,7 @@ Subsystem-specific APIs
gpio/index
hsi
hte/index
+ hw-recoverable-errors
i2c
iio/index
infiniband
diff --git a/Documentation/mm/index.rst b/Documentation/mm/index.rst
index ba6a8872849b..7aa2a8886908 100644
--- a/Documentation/mm/index.rst
+++ b/Documentation/mm/index.rst
@@ -48,6 +48,7 @@ documentation, or deleted if it has served its purpose.
hugetlbfs_reserv
ksm
memory-model
+ memfd_preservation
mmu_notifier
multigen_lru
numa
diff --git a/Documentation/mm/memfd_preservation.rst b/Documentation/mm/memfd_preservation.rst
new file mode 100644
index 000000000000..66e0fb6d5ef0
--- /dev/null
+++ b/Documentation/mm/memfd_preservation.rst
@@ -0,0 +1,23 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+==========================
+Memfd Preservation via LUO
+==========================
+
+.. kernel-doc:: mm/memfd_luo.c
+ :doc: Memfd Preservation via LUO
+
+Memfd Preservation ABI
+======================
+
+.. kernel-doc:: include/linux/kho/abi/memfd.h
+ :doc: DOC: memfd Live Update ABI
+
+.. kernel-doc:: include/linux/kho/abi/memfd.h
+ :internal:
+
+See Also
+========
+
+- :doc:`/core-api/liveupdate`
+- :doc:`/core-api/kho/concepts`
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index b8c73be4fb11..8a61ac4c1bf1 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -61,6 +61,7 @@ Everything else
:maxdepth: 1
ELF
+ liveupdate
netlink/index
sysfs-platform_profile
vduse
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 7c527a01d1cf..7232b3544cec 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -385,6 +385,8 @@ Code Seq# Include File Comments
0xB8 01-02 uapi/misc/mrvl_cn10k_dpi.h Marvell CN10K DPI driver
0xB8 all uapi/linux/mshv.h Microsoft Hyper-V /dev/mshv driver
<mailto:linux-hyperv@vger.kernel.org>
+0xBA 00-0F uapi/linux/liveupdate.h Pasha Tatashin
+ <mailto:pasha.tatashin@soleen.com>
0xC0 00-0F linux/usb/iowarrior.h
0xCA 00-0F uapi/misc/cxl.h Dead since 6.15
0xCA 10-2F uapi/misc/ocxl.h
diff --git a/Documentation/userspace-api/liveupdate.rst b/Documentation/userspace-api/liveupdate.rst
new file mode 100644
index 000000000000..41c0473e4f16
--- /dev/null
+++ b/Documentation/userspace-api/liveupdate.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Live Update uAPI
+================
+:Author: Pasha Tatashin <pasha.tatashin@soleen.com>
+
+ioctl interface
+===============
+.. kernel-doc:: kernel/liveupdate/luo_core.c
+ :doc: LUO ioctl Interface
+
+ioctl uAPI
+===========
+.. kernel-doc:: include/uapi/linux/liveupdate.h
+
+See Also
+========
+
+- :doc:`Live Update Orchestrator </core-api/liveupdate>`
diff --git a/MAINTAINERS b/MAINTAINERS
index 78b32a60849a..9a3657a40000 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11659,7 +11659,7 @@ T: git git://linuxtv.org/media.git
F: drivers/media/i2c/hi556.c
HYNIX HI846 SENSOR DRIVER
-M: Martin Kepplinger <martin.kepplinger@puri.sm>
+M: Martin Kepplinger-Novakovic <martink@posteo.de>
L: linux-media@vger.kernel.org
S: Maintained
F: drivers/media/i2c/hi846.c
@@ -11744,6 +11744,7 @@ HUNG TASK DETECTOR
M: Andrew Morton <akpm@linux-foundation.org>
R: Lance Yang <lance.yang@linux.dev>
R: Masami Hiramatsu <mhiramat@kernel.org>
+R: Petr Mladek <pmladek@suse.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: include/linux/hung_task.h
@@ -13891,14 +13892,15 @@ F: kernel/kexec*
KEXEC HANDOVER (KHO)
M: Alexander Graf <graf@amazon.com>
M: Mike Rapoport <rppt@kernel.org>
-M: Changyuan Lyu <changyuanl@google.com>
+M: Pasha Tatashin <pasha.tatashin@soleen.com>
+R: Pratyush Yadav <pratyush@kernel.org>
L: kexec@lists.infradead.org
L: linux-mm@kvack.org
S: Maintained
F: Documentation/admin-guide/mm/kho.rst
F: Documentation/core-api/kho/*
F: include/linux/kexec_handover.h
-F: kernel/kexec_handover.c
+F: kernel/liveupdate/kexec_handover*
F: lib/test_kho.c
F: tools/testing/selftests/kho/
@@ -14567,6 +14569,22 @@ F: samples/livepatch/
F: scripts/livepatch/
F: tools/testing/selftests/livepatch/
+LIVE UPDATE
+M: Pasha Tatashin <pasha.tatashin@soleen.com>
+M: Mike Rapoport <rppt@kernel.org>
+R: Pratyush Yadav <pratyush@kernel.org>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: Documentation/core-api/liveupdate.rst
+F: Documentation/mm/memfd_preservation.rst
+F: Documentation/userspace-api/liveupdate.rst
+F: include/linux/liveupdate.h
+F: include/linux/liveupdate/
+F: include/uapi/linux/liveupdate.h
+F: kernel/liveupdate/
+F: mm/memfd_luo.c
+F: tools/testing/selftests/liveupdate/
+
LLC (802.2)
L: netdev@vger.kernel.org
S: Odd fixes
@@ -15668,7 +15686,7 @@ F: include/media/imx.h
MEDIA DRIVERS FOR FREESCALE IMX7/8
M: Rui Miguel Silva <rmfrfs@gmail.com>
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
-M: Martin Kepplinger <martin.kepplinger@puri.sm>
+M: Martin Kepplinger-Novakovic <martink@posteo.de>
R: Purism Kernel Team <kernel@puri.sm>
R: Frank Li <Frank.Li@nxp.com>
L: imx@lists.linux.dev
@@ -18420,10 +18438,11 @@ F: net/sunrpc/
NILFS2 FILESYSTEM
M: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+M: Viacheslav Dubeyko <slava@dubeyko.com>
L: linux-nilfs@vger.kernel.org
-S: Supported
+S: Maintained
W: https://nilfs.sourceforge.io/
-T: git https://github.com/konis/nilfs2.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/vdubeyko/nilfs2.git
F: Documentation/filesystems/nilfs2.rst
F: fs/nilfs2/
F: include/trace/events/nilfs2.h
@@ -25103,7 +25122,6 @@ F: drivers/regulator/sy8106a-regulator.c
SYNC FILE FRAMEWORK
M: Sumit Semwal <sumit.semwal@linaro.org>
-R: Gustavo Padovan <gustavo@padovan.org>
L: linux-media@vger.kernel.org
L: dri-devel@lists.freedesktop.org
S: Maintained
@@ -26308,7 +26326,7 @@ M: Jarkko Sakkinen <jarkko@kernel.org>
R: Jason Gunthorpe <jgg@ziepe.ca>
L: linux-integrity@vger.kernel.org
S: Maintained
-W: https://codeberg.org/jarkko/linux-tpmdd-test
+W: https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd-test.git/about/
Q: https://patchwork.kernel.org/project/linux-integrity/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
F: Documentation/devicetree/bindings/tpm/
diff --git a/arch/Kconfig b/arch/Kconfig
index 61130b88964b..31220f512b16 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -232,17 +232,14 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
config ARCH_USE_BUILTIN_BSWAP
bool
help
- Modern versions of GCC (since 4.4) have builtin functions
- for handling byte-swapping. Using these, instead of the old
- inline assembler that the architecture code provides in the
- __arch_bswapXX() macros, allows the compiler to see what's
- happening and offers more opportunity for optimisation. In
- particular, the compiler will be able to combine the byteswap
- with a nearby load or store and use load-and-swap or
- store-and-swap instructions if the architecture has them. It
- should almost *never* result in code which is worse than the
- hand-coded assembler in <asm/swab.h>. But just in case it
- does, the use of the builtins is optional.
+ GCC and Clang have builtin functions for handling byte-swapping.
+ Using these allows the compiler to see what's happening and
+ offers more opportunity for optimisation. In particular, the
+ compiler will be able to combine the byteswap with a nearby load
+ or store and use load-and-swap or store-and-swap instructions if
+ the architecture has them. It should almost *never* result in code
+ which is worse than the hand-coded assembler in <asm/swab.h>.
+ But just in case it does, the use of the builtins is optional.
Any architecture with load-and-swap or store-and-swap
instructions should set this. And it shouldn't hurt to set it
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4fb985b76e97..ff61891abe53 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1161,8 +1161,6 @@ config AEABI
disambiguate both ABIs and allow for backward compatibility support
(selected with CONFIG_OABI_COMPAT).
- To use this you need GCC version 4.0.0 or later.
-
config OABI_COMPAT
bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
depends on AEABI && !THUMB2_KERNEL
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 61cee1e7ebea..c3b0d5f06889 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -308,7 +308,7 @@ CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=-1
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
CONFIG_WQ_WATCHDOG=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_FUNCTION_TRACER=y
diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h
index 6467ce29b1fa..d1b570ddbf98 100644
--- a/arch/powerpc/include/asm/crash_reserve.h
+++ b/arch/powerpc/include/asm/crash_reserve.h
@@ -5,4 +5,12 @@
/* crash kernel regions are Page size agliged */
#define CRASH_ALIGN PAGE_SIZE
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+static inline bool arch_add_crash_res_to_iomem(void)
+{
+ return false;
+}
+#define arch_add_crash_res_to_iomem arch_add_crash_res_to_iomem
+#endif
+
#endif /* _ASM_POWERPC_CRASH_RESERVE_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..30fd06ede751 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
}
#define div_u64_rem div_u64_rem
+/*
+ * gcc tends to zero extend 32bit values and do full 64bit maths.
+ * Define asm functions that avoid this.
+ * (clang generates better code for the C versions.)
+ */
+#ifndef __clang__
static inline u64 mul_u32_u32(u32 a, u32 b)
{
u32 high, low;
@@ -71,6 +77,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
}
#define mul_u32_u32 mul_u32_u32
+static inline u64 add_u64_u32(u64 a, u32 b)
+{
+ u32 high = a >> 32, low = a;
+
+ asm ("addl %[b], %[low]; adcl $0, %[high]"
+ : [low] "+r" (low), [high] "+r" (high)
+ : [b] "rm" (b) );
+
+ return low | (u64)high << 32;
+}
+#define add_u64_u32 add_u64_u32
+#endif
+
/*
* __div64_32() is never called on x86, so prevent the
* generic definition from getting built.
@@ -84,21 +103,25 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
* Will generate an #DE when the result doesn't fit u64, could fix with an
* __ex_table[] entry when it becomes an issue.
*/
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div)
{
- u64 q;
+ u64 rdx;
+
+ asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul));
+
+ if (!statically_true(!add))
+ asm ("addq %[add], %[lo]; adcq $0, %[hi]" :
+ [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add));
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div));
- return q;
+ return rax;
}
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
{
- return mul_u64_u64_div_u64(a, mul, div);
+ return mul_u64_add_u64_div_u64(a, mul, 0, div);
}
#define mul_u64_u32_div mul_u64_u32_div
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6297416647ed..34440021e8cf 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -45,6 +45,7 @@
#include <linux/task_work.h>
#include <linux/hardirq.h>
#include <linux/kexec.h>
+#include <linux/vmcore_info.h>
#include <asm/fred.h>
#include <asm/cpu_device_id.h>
@@ -1729,6 +1730,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
}
out:
+ /* Given it didn't panic, mark it as recoverable */
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+
instrumentation_end();
clear:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 626908491d8f..0dc767392a6c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,6 +44,7 @@
#include <linux/uuid.h>
#include <linux/ras.h>
#include <linux/task_work.h>
+#include <linux/vmcore_info.h>
#include <acpi/actbl1.h>
#include <acpi/ghes.h>
@@ -864,6 +865,40 @@ int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
+static void ghes_log_hwerr(int sev, guid_t *sec_type)
+{
+ if (sev != CPER_SEV_RECOVERABLE)
+ return;
+
+ if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) ||
+ guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+ hwerr_log_error_type(HWERR_RECOV_CPU);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) ||
+ guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ hwerr_log_error_type(HWERR_RECOV_CXL);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PCIE) ||
+ guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) {
+ hwerr_log_error_type(HWERR_RECOV_PCI);
+ return;
+ }
+
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+ hwerr_log_error_type(HWERR_RECOV_MEMORY);
+ return;
+ }
+
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+}
+
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -885,6 +920,7 @@ static void ghes_do_proc(struct ghes *ghes,
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
fru_text = gdata->fru_text;
+ ghes_log_hwerr(sev, sec_type);
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index 1f51fbebd9fa..e07e7d4bf8b6 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -178,7 +178,7 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
if (!key)
return ERR_PTR(-ENOMEM);
- key_len = base64_decode(secret, allocated_len, key->key);
+ key_len = base64_decode(secret, allocated_len, key->key, true, BASE64_STD);
if (key_len < 0) {
pr_debug("base64 key decoding error %d\n",
key_len);
@@ -663,7 +663,7 @@ int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len,
if (ret)
goto out_free_digest;
- ret = base64_encode(digest, digest_len, enc);
+ ret = base64_encode(digest, digest_len, enc, true, BASE64_STD);
if (ret < hmac_len) {
ret = -ENOKEY;
goto out_free_digest;
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 0b5ed4722ac3..e0bcaa896803 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -30,6 +30,7 @@
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/vmcore_info.h>
#include <acpi/apei.h>
#include <acpi/ghes.h>
#include <ras/ras_event.h>
@@ -765,6 +766,7 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
break;
case AER_NONFATAL:
aer_info->dev_total_nonfatal_errs++;
+ hwerr_log_error_type(HWERR_RECOV_PCI);
counter = &aer_info->dev_nonfatal_errs[0];
max = AER_MAX_TYPEOF_UNCOR_ERRS;
break;
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 928746b92512..0ea4db650f85 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -15,59 +15,6 @@
#include "mds_client.h"
#include "crypto.h"
-/*
- * The base64url encoding used by fscrypt includes the '_' character, which may
- * cause problems in snapshot names (which can not start with '_'). Thus, we
- * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
- * which replaces '-' and '_' by '+' and ','.
- */
-static const char base64_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
-
-int ceph_base64_encode(const u8 *src, int srclen, char *dst)
-{
- u32 ac = 0;
- int bits = 0;
- int i;
- char *cp = dst;
-
- for (i = 0; i < srclen; i++) {
- ac = (ac << 8) | src[i];
- bits += 8;
- do {
- bits -= 6;
- *cp++ = base64_table[(ac >> bits) & 0x3f];
- } while (bits >= 6);
- }
- if (bits)
- *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
- return cp - dst;
-}
-
-int ceph_base64_decode(const char *src, int srclen, u8 *dst)
-{
- u32 ac = 0;
- int bits = 0;
- int i;
- u8 *bp = dst;
-
- for (i = 0; i < srclen; i++) {
- const char *p = strchr(base64_table, src[i]);
-
- if (p == NULL || src[i] == 0)
- return -1;
- ac = (ac << 6) | (p - base64_table);
- bits += 6;
- if (bits >= 8) {
- bits -= 8;
- *bp++ = (u8)(ac >> bits);
- }
- }
- if (ac & ((1 << bits) - 1))
- return -1;
- return bp - dst;
-}
-
static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len)
{
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -318,7 +265,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen)
}
/* base64 encode the encrypted name */
- elen = ceph_base64_encode(cryptbuf, len, p);
+ elen = base64_encode(cryptbuf, len, p, false, BASE64_IMAP);
doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, p);
/* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
@@ -412,7 +359,8 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
tname = &_tname;
}
- declen = ceph_base64_decode(name, name_len, tname->name);
+ declen = base64_decode(name, name_len,
+ tname->name, false, BASE64_IMAP);
if (declen <= 0) {
ret = -EIO;
goto out;
@@ -426,7 +374,7 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname);
if (!ret && (dir != fname->dir)) {
- char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)];
+ char tmp_buf[BASE64_CHARS(NAME_MAX)];
name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
oname->len, oname->name, dir->i_ino);
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index 23612b2e9837..b748e2060bc9 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -8,6 +8,7 @@
#include <crypto/sha2.h>
#include <linux/fscrypt.h>
+#include <linux/base64.h>
#define CEPH_FSCRYPT_BLOCK_SHIFT 12
#define CEPH_FSCRYPT_BLOCK_SIZE (_AC(1, UL) << CEPH_FSCRYPT_BLOCK_SHIFT)
@@ -89,11 +90,6 @@ static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa)
*/
#define CEPH_NOHASH_NAME_MAX (180 - SHA256_DIGEST_SIZE)
-#define CEPH_BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
-
-int ceph_base64_encode(const u8 *src, int srclen, char *dst);
-int ceph_base64_decode(const char *src, int srclen, u8 *dst);
-
void ceph_fscrypt_set_ops(struct super_block *sb);
void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bf50c6e7a029..86d7aa594ea9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -998,13 +998,14 @@ static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
if (err)
goto out;
- req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
+ req->r_path2 = kmalloc(BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
if (!req->r_path2) {
err = -ENOMEM;
goto out;
}
- len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
+ len = base64_encode(osd_link.name, osd_link.len,
+ req->r_path2, false, BASE64_IMAP);
req->r_path2[len] = '\0';
out:
fscrypt_fname_free_buffer(&osd_link);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a596cb53f1ac..2966f88310e3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -947,7 +947,7 @@ static int decode_encrypted_symlink(struct ceph_mds_client *mdsc,
if (!sym)
return -ENOMEM;
- declen = ceph_base64_decode(encsym, enclen, sym);
+ declen = base64_decode(encsym, enclen, sym, false, BASE64_IMAP);
if (declen < 0) {
pr_err_client(cl,
"can't decode symlink (%d). Content: %.*s\n",
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 8e4c213d418b..a9a4432d12ba 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -16,6 +16,7 @@
#include <linux/export.h>
#include <linux/namei.h>
#include <linux/scatterlist.h>
+#include <linux/base64.h>
#include "fscrypt_private.h"
@@ -71,7 +72,7 @@ struct fscrypt_nokey_name {
/* Encoded size of max-size no-key name */
#define FSCRYPT_NOKEY_NAME_MAX_ENCODED \
- FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX)
+ BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
@@ -162,84 +163,6 @@ static int fname_decrypt(const struct inode *inode,
return 0;
}
-static const char base64url_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-
-#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
-
-/**
- * fscrypt_base64url_encode() - base64url-encode some binary data
- * @src: the binary data to encode
- * @srclen: the length of @src in bytes
- * @dst: (output) the base64url-encoded string. Not NUL-terminated.
- *
- * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL
- * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used,
- * as it's unneeded and not required by the RFC. base64url is used instead of
- * base64 to avoid the '/' character, which isn't allowed in filenames.
- *
- * Return: the length of the resulting base64url-encoded string in bytes.
- * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen).
- */
-static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst)
-{
- u32 ac = 0;
- int bits = 0;
- int i;
- char *cp = dst;
-
- for (i = 0; i < srclen; i++) {
- ac = (ac << 8) | src[i];
- bits += 8;
- do {
- bits -= 6;
- *cp++ = base64url_table[(ac >> bits) & 0x3f];
- } while (bits >= 6);
- }
- if (bits)
- *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f];
- return cp - dst;
-}
-
-/**
- * fscrypt_base64url_decode() - base64url-decode a string
- * @src: the string to decode. Doesn't need to be NUL-terminated.
- * @srclen: the length of @src in bytes
- * @dst: (output) the decoded binary data
- *
- * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with
- * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't
- * accepted, nor are non-encoding characters such as whitespace.
- *
- * This implementation hasn't been optimized for performance.
- *
- * Return: the length of the resulting decoded binary data in bytes,
- * or -1 if the string isn't a valid base64url string.
- */
-static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst)
-{
- u32 ac = 0;
- int bits = 0;
- int i;
- u8 *bp = dst;
-
- for (i = 0; i < srclen; i++) {
- const char *p = strchr(base64url_table, src[i]);
-
- if (p == NULL || src[i] == 0)
- return -1;
- ac = (ac << 6) | (p - base64url_table);
- bits += 6;
- if (bits >= 8) {
- bits -= 8;
- *bp++ = (u8)(ac >> bits);
- }
- }
- if (ac & ((1 << bits) - 1))
- return -1;
- return bp - dst;
-}
-
bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
u32 orig_len, u32 max_len,
u32 *encrypted_len_ret)
@@ -387,8 +310,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
nokey_name.sha256);
size = FSCRYPT_NOKEY_NAME_MAX;
}
- oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size,
- oname->name);
+ oname->len = base64_encode((const u8 *)&nokey_name, size,
+ oname->name, false, BASE64_URLSAFE);
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
@@ -467,8 +390,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
- ret = fscrypt_base64url_decode(iname->name, iname->len,
- fname->crypto_buf.name);
+ ret = base64_decode(iname->name, iname->len,
+ fname->crypto_buf.name, false, BASE64_URLSAFE);
if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) ||
(ret > offsetof(struct fscrypt_nokey_name, sha256) &&
ret != FSCRYPT_NOKEY_NAME_MAX)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3288c3b4be9e..e17b8da66491 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -49,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
void *, size_t, size_t))
{
void *buf;
- void __user *base = (void __user *)(unsigned long)argv->v_base;
+ void __user *base = u64_to_user_ptr(argv->v_base);
size_t maxmembs, total, n;
ssize_t nr;
int ret, i;
@@ -836,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
sizeof(struct nilfs_bdesc),
sizeof(__u64),
};
- void __user *base;
void *kbufs[5];
struct the_nilfs *nilfs;
size_t len, nsegs;
@@ -863,7 +862,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
* use kmalloc() for its buffer because the memory used for the
* segment numbers is small enough.
*/
- kbufs[4] = memdup_array_user((void __user *)(unsigned long)argv[4].v_base,
+ kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base),
nsegs, sizeof(__u64));
if (IS_ERR(kbufs[4])) {
ret = PTR_ERR(kbufs[4]);
@@ -883,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
goto out_free;
len = argv[n].v_size * argv[n].v_nmembs;
- base = (void __user *)(unsigned long)argv[n].v_base;
if (len == 0) {
kbufs[n] = NULL;
continue;
}
- kbufs[n] = vmalloc(len);
- if (!kbufs[n]) {
- ret = -ENOMEM;
- goto out_free;
- }
- if (copy_from_user(kbufs[n], base, len)) {
- ret = -EFAULT;
- vfree(kbufs[n]);
+ kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len);
+ if (IS_ERR(kbufs[n])) {
+ ret = PTR_ERR(kbufs[n]);
goto out_free;
}
}
@@ -928,7 +921,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
out_free:
while (--n >= 0)
- vfree(kbufs[n]);
+ kvfree(kbufs[n]);
kfree(kbufs[4]);
out:
mnt_drop_write_file(filp);
@@ -1181,7 +1174,6 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
struct nilfs_transaction_info ti;
struct nilfs_argv argv;
size_t len;
- void __user *base;
void *kbuf;
int ret;
@@ -1212,18 +1204,12 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
goto out;
}
- base = (void __user *)(unsigned long)argv.v_base;
- kbuf = vmalloc(len);
- if (!kbuf) {
- ret = -ENOMEM;
+ kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len);
+ if (IS_ERR(kbuf)) {
+ ret = PTR_ERR(kbuf);
goto out;
}
- if (copy_from_user(kbuf, base, len)) {
- ret = -EFAULT;
- goto out_free;
- }
-
nilfs_transaction_begin(inode->i_sb, &ti, 0);
ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size,
argv.v_nmembs);
@@ -1232,8 +1218,7 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
else
nilfs_transaction_commit(inode->i_sb); /* never fails */
-out_free:
- vfree(kbuf);
+ kvfree(kbuf);
out:
mnt_drop_write_file(filp);
return ret;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8c9c4825f984..2785ff245e79 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -302,8 +302,21 @@ static int ocfs2_check_dir_entry(struct inode *dir,
unsigned long offset)
{
const char *error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
- const unsigned long next_offset = ((char *) de - buf) + rlen;
+ unsigned long next_offset;
+ int rlen;
+
+ if (offset > size - OCFS2_DIR_REC_LEN(1)) {
+ /* Dirent is (maybe partially) beyond the buffer
+ * boundaries so touching 'de' members is unsafe.
+ */
+ mlog(ML_ERROR, "directory entry (#%llu: offset=%lu) "
+ "too close to end or out-of-bounds",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno, offset);
+ return 0;
+ }
+
+ rlen = le16_to_cpu(de->rec_len);
+ next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -778,6 +791,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_rec *rec = NULL;
+ if (le16_to_cpu(el->l_count) !=
+ ocfs2_extent_recs_per_dx_root(inode->i_sb)) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %lu has invalid extent list length %u\n",
+ inode->i_ino, le16_to_cpu(el->l_count));
+ goto out;
+ }
+
if (el->l_tree_depth) {
ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
&eb_bh);
@@ -3423,6 +3444,14 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
offset += le16_to_cpu(de->rec_len);
}
+ if (!last_de) {
+ ret = ocfs2_error(sb, "Directory entry (#%llu: size=%lld) "
+ "is unexpectedly short",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ i_size_read(dir));
+ goto out;
+ }
+
/*
* We're going to require expansion of the directory - figure
* out how many blocks we'll need so that a place for the
@@ -4104,10 +4133,15 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
}
dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
- memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
- offsetof(struct ocfs2_dx_root_block, dr_list));
+
+ dx_root->dr_list.l_tree_depth = 0;
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+ dx_root->dr_list.l_next_free_rec = 0;
+ memset(&dx_root->dr_list.l_recs, 0,
+ osb->sb->s_blocksize -
+ (offsetof(struct ocfs2_dx_root_block, dr_list) +
+ offsetof(struct ocfs2_extent_list, l_recs)));
/* This should never fail considering we start with an empty
* dx_root. */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 78f81950c9ee..8340525e5589 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -201,13 +201,15 @@ bail:
static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di)
{
/* inodes flagged with other stuff in id2 */
- if (di->i_flags & (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL |
- OCFS2_CHAIN_FL | OCFS2_DEALLOC_FL))
+ if (le32_to_cpu(di->i_flags) &
+ (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | OCFS2_CHAIN_FL |
+ OCFS2_DEALLOC_FL))
return 0;
/* i_flags doesn't indicate when id2 is a fast symlink */
- if (S_ISLNK(di->i_mode) && di->i_size && di->i_clusters == 0)
+ if (S_ISLNK(le16_to_cpu(di->i_mode)) && le64_to_cpu(di->i_size) &&
+ !le32_to_cpu(di->i_clusters))
return 0;
- if (di->i_dyn_features & OCFS2_INLINE_DATA_FL)
+ if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)
return 0;
return 1;
@@ -1460,7 +1462,7 @@ int ocfs2_validate_inode_block(struct super_block *sb,
goto bail;
}
- if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ if (!(le32_to_cpu(di->i_flags) & OCFS2_VALID_FL)) {
rc = ocfs2_error(sb,
"Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
(unsigned long long)bh->b_blocknr);
@@ -1484,6 +1486,41 @@ int ocfs2_validate_inode_block(struct super_block *sb,
goto bail;
}
+ if ((le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) &&
+ le32_to_cpu(di->i_clusters)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: %u clusters\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_clusters));
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_flags) & OCFS2_CHAIN_FL) {
+ struct ocfs2_chain_list *cl = &di->id2.i_chain;
+ u16 bpc = 1 << (OCFS2_SB(sb)->s_clustersize_bits -
+ sb->s_blocksize_bits);
+
+ if (le16_to_cpu(cl->cl_count) != ocfs2_chain_recs_per_inode(sb)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: chain list count %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_count));
+ goto bail;
+ }
+ if (le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: chain list index %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_next_free_rec));
+ goto bail;
+ }
+ if (OCFS2_SB(sb)->bitmap_blkno &&
+ OCFS2_SB(sb)->bitmap_blkno != le64_to_cpu(di->i_blkno) &&
+ le16_to_cpu(cl->cl_bpc) != bpc) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: bits per cluster %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_bpc));
+ goto bail;
+ }
+ }
+
rc = 0;
bail:
@@ -1671,6 +1708,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
1, &tmp, flags, ocfs2_validate_inode_block);
+ if (rc < 0)
+ make_bad_inode(inode);
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
if (!rc && !*bh)
*bh = tmp;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 10923bf7c8b8..ce978a2497d9 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,13 @@ static int __ocfs2_move_extent(handle_t *handle,
rec = &el->l_recs[index];
- BUG_ON(ext_flags != rec->e_flags);
+ if (ext_flags != rec->e_flags) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n",
+ (unsigned long long)ino, index, rec->e_flags, cpos);
+ goto out;
+ }
+
/*
* after moving/defraging to new location, the extent is not going
* to be refcounted anymore.
@@ -1036,6 +1042,12 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
if (range.me_threshold > i_size_read(inode))
range.me_threshold = i_size_read(inode);
+ if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG |
+ OCFS2_MOVE_EXT_FL_PART_DEFRAG)) {
+ status = -EINVAL;
+ goto out_free;
+ }
+
if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
context->auto_defrag = 1;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index ae0e44e5f2ad..f7763da5c4a2 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -468,7 +468,8 @@ struct ocfs2_extent_list {
__le16 l_reserved1;
__le64 l_reserved2; /* Pad to
sizeof(ocfs2_extent_rec) */
-/*10*/ struct ocfs2_extent_rec l_recs[]; /* Extent records */
+ /* Extent records */
+/*10*/ struct ocfs2_extent_rec l_recs[] __counted_by_le(l_count);
};
/*
@@ -482,7 +483,8 @@ struct ocfs2_chain_list {
__le16 cl_count; /* Total chains in this list */
__le16 cl_next_free_rec; /* Next unused chain slot */
__le64 cl_reserved1;
-/*10*/ struct ocfs2_chain_rec cl_recs[]; /* Chain records */
+ /* Chain records */
+/*10*/ struct ocfs2_chain_rec cl_recs[] __counted_by_le(cl_count);
};
/*
@@ -494,7 +496,8 @@ struct ocfs2_truncate_log {
/*00*/ __le16 tl_count; /* Total records in this log */
__le16 tl_used; /* Number of records in use */
__le32 tl_reserved1;
-/*08*/ struct ocfs2_truncate_rec tl_recs[]; /* Truncate records */
+ /* Truncate records */
+/*08*/ struct ocfs2_truncate_rec tl_recs[] __counted_by_le(tl_count);
};
/*
@@ -796,9 +799,10 @@ struct ocfs2_dx_entry_list {
* possible in de_entries */
__le16 de_num_used; /* Current number of
* de_entries entries */
- struct ocfs2_dx_entry de_entries[]; /* Indexed dir entries
- * in a packed array of
- * length de_num_used */
+ /* Indexed dir entries in a packed
+ * array of length de_num_used.
+ */
+ struct ocfs2_dx_entry de_entries[] __counted_by_le(de_count);
};
#define OCFS2_DX_FLAG_INLINE 0x01
@@ -934,7 +938,8 @@ struct ocfs2_refcount_list {
__le16 rl_used; /* Current number of used records */
__le32 rl_reserved2;
__le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */
-/*10*/ struct ocfs2_refcount_rec rl_recs[]; /* Refcount records */
+ /* Refcount records */
+/*10*/ struct ocfs2_refcount_rec rl_recs[] __counted_by_le(rl_count);
};
@@ -1020,7 +1025,8 @@ struct ocfs2_xattr_header {
buckets. A block uses
xb_check and sets
this field to zero.) */
- struct ocfs2_xattr_entry xh_entries[]; /* xattr entry list. */
+ /* xattr entry list. */
+ struct ocfs2_xattr_entry xh_entries[] __counted_by_le(xh_count);
};
/*
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 267b50e8e42e..c92e0ea85bca 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -34,6 +34,7 @@
#include <linux/pagevec.h>
#include <linux/swap.h>
#include <linux/security.h>
+#include <linux/string.h>
#include <linux/fsnotify.h>
#include <linux/quotaops.h>
#include <linux/namei.h>
@@ -621,7 +622,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
/* Initialize ocfs2_refcount_block. */
rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(rb, 0, inode->i_sb->s_blocksize);
- strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+ strscpy(rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
@@ -1562,7 +1563,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
/* Initialize ocfs2_refcount_block. */
new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(new_rb, 0, sb->s_blocksize);
- strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+ strscpy(new_rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d70a20d29e3e..73c028f452ac 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2908,7 +2908,7 @@ static int ocfs2_create_xattr_block(struct inode *inode,
/* Initialize ocfs2_xattr_block */
xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
memset(xblk, 0, inode->i_sb->s_blocksize);
- strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
+ strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE);
xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index fc64f23e05e5..f9b2c2c906cd 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -20,7 +20,6 @@
#define KPMSIZE sizeof(u64)
#define KPMMASK (KPMSIZE - 1)
-#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
enum kpage_operation {
KPAGE_FLAGS,
diff --git a/include/linux/base64.h b/include/linux/base64.h
index 660d4cb1ef31..a2c6c9222da3 100644
--- a/include/linux/base64.h
+++ b/include/linux/base64.h
@@ -8,9 +8,15 @@
#include <linux/types.h>
+enum base64_variant {
+ BASE64_STD, /* RFC 4648 (standard) */
+ BASE64_URLSAFE, /* RFC 4648 (base64url) */
+ BASE64_IMAP, /* RFC 3501 */
+};
+
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
-int base64_encode(const u8 *src, int len, char *dst);
-int base64_decode(const char *src, int len, u8 *dst);
+int base64_encode(const u8 *src, int len, char *dst, bool padding, enum base64_variant variant);
+int base64_decode(const char *src, int len, u8 *dst, bool padding, enum base64_variant variant);
#endif /* _LINUX_BASE64_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ab181d87d71d..ff71bebe56f5 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -273,12 +273,6 @@ static inline void *offset_to_ptr(const int *off)
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_64BIT
-#define ARCH_SEL(a,b) a
-#else
-#define ARCH_SEL(a,b) b
-#endif
-
/*
* Force the compiler to emit 'sym' as a symbol, so that we can reference
* it from inline assembler. Necessary in case 'sym' could be inlined
diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
index 7b44b41d0a20..f0dc03d94ca2 100644
--- a/include/linux/crash_reserve.h
+++ b/include/linux/crash_reserve.h
@@ -32,6 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
void __init reserve_crashkernel_cma(unsigned long long cma_size);
#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#ifndef arch_add_crash_res_to_iomem
+static inline bool arch_add_crash_res_to_iomem(void)
+{
+ return true;
+}
+#endif
#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
#endif
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index ff44ec346162..05743900a116 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -38,11 +38,12 @@ struct _ddebug {
#define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
#define _DPRINTK_FLAGS_INCL_TID (1<<4)
#define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
+#define _DPRINTK_FLAGS_INCL_STACK (1<<6)
#define _DPRINTK_FLAGS_INCL_ANY \
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
_DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\
- _DPRINTK_FLAGS_INCL_SOURCENAME)
+ _DPRINTK_FLAGS_INCL_SOURCENAME | _DPRINTK_FLAGS_INCL_STACK)
#if defined DEBUG
#define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
@@ -160,6 +161,12 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
const struct ib_device *ibdev,
const char *fmt, ...);
+#define __dynamic_dump_stack(desc) \
+{ \
+ if (desc.flags & _DPRINTK_FLAGS_INCL_STACK) \
+ dump_stack(); \
+}
+
#define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt) \
static struct _ddebug __aligned(8) \
__section("__dyndbg") name = { \
@@ -220,8 +227,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
*/
#define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \
DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \
- if (DYNAMIC_DEBUG_BRANCH(id)) \
+ if (DYNAMIC_DEBUG_BRANCH(id)) { \
func(&id, ##__VA_ARGS__); \
+ __dynamic_dump_stack(id); \
+ } \
} while (0)
#define __dynamic_func_call(id, fmt, func, ...) \
__dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt, \
@@ -229,8 +238,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
#define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do { \
DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \
- if (DYNAMIC_DEBUG_BRANCH(id)) \
+ if (DYNAMIC_DEBUG_BRANCH(id)) { \
func(__VA_ARGS__); \
+ __dynamic_dump_stack(id); \
+ } \
} while (0)
#define __dynamic_func_call_no_desc(id, fmt, func, ...) \
__dynamic_func_call_cls_no_desc(id, _DPRINTK_CLASS_DFLT, \
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 25042c1d8d54..5f7b9de97e8d 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -2,22 +2,16 @@
#ifndef LINUX_KEXEC_HANDOVER_H
#define LINUX_KEXEC_HANDOVER_H
-#include <linux/types.h>
+#include <linux/err.h>
#include <linux/errno.h>
+#include <linux/types.h>
struct kho_scratch {
phys_addr_t addr;
phys_addr_t size;
};
-/* KHO Notifier index */
-enum kho_event {
- KEXEC_KHO_FINALIZE = 0,
- KEXEC_KHO_ABORT = 1,
-};
-
struct folio;
-struct notifier_block;
struct page;
#define DECLARE_KHOSER_PTR(name, type) \
@@ -37,8 +31,6 @@ struct page;
(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
})
-struct kho_serialization;
-
struct kho_vmalloc_chunk;
struct kho_vmalloc {
DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
@@ -52,17 +44,21 @@ bool kho_is_enabled(void);
bool is_kho_boot(void);
int kho_preserve_folio(struct folio *folio);
+void kho_unpreserve_folio(struct folio *folio);
int kho_preserve_pages(struct page *page, unsigned int nr_pages);
+void kho_unpreserve_pages(struct page *page, unsigned int nr_pages);
int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
+void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation);
+void *kho_alloc_preserve(size_t size);
+void kho_unpreserve_free(void *mem);
+void kho_restore_free(void *mem);
struct folio *kho_restore_folio(phys_addr_t phys);
struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_add_subtree(const char *name, void *fdt);
+void kho_remove_subtree(void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
-int register_kho_notifier(struct notifier_block *nb);
-int unregister_kho_notifier(struct notifier_block *nb);
-
void kho_memory_init(void);
void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
@@ -83,17 +79,31 @@ static inline int kho_preserve_folio(struct folio *folio)
return -EOPNOTSUPP;
}
+static inline void kho_unpreserve_folio(struct folio *folio) { }
+
static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
return -EOPNOTSUPP;
}
+static inline void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) { }
+
static inline int kho_preserve_vmalloc(void *ptr,
struct kho_vmalloc *preservation)
{
return -EOPNOTSUPP;
}
+static inline void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation) { }
+
+static inline void *kho_alloc_preserve(size_t size)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void kho_unpreserve_free(void *mem) { }
+static inline void kho_restore_free(void *mem) { }
+
static inline struct folio *kho_restore_folio(phys_addr_t phys)
{
return NULL;
@@ -110,30 +120,19 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
return NULL;
}
-static inline int kho_add_subtree(struct kho_serialization *ser,
- const char *name, void *fdt)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+static inline int kho_add_subtree(const char *name, void *fdt)
{
return -EOPNOTSUPP;
}
-static inline int register_kho_notifier(struct notifier_block *nb)
-{
- return -EOPNOTSUPP;
-}
+static inline void kho_remove_subtree(void *fdt) { }
-static inline int unregister_kho_notifier(struct notifier_block *nb)
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
{
return -EOPNOTSUPP;
}
-static inline void kho_memory_init(void)
-{
-}
+static inline void kho_memory_init(void) { }
static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
phys_addr_t scratch_phys, u64 scratch_len)
diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h
new file mode 100644
index 000000000000..bb099c92e469
--- /dev/null
+++ b/include/linux/kho/abi/luo.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: Live Update Orchestrator ABI
+ *
+ * This header defines the stable Application Binary Interface used by the
+ * Live Update Orchestrator to pass state from a pre-update kernel to a
+ * post-update kernel. The ABI is built upon the Kexec HandOver framework
+ * and uses a Flattened Device Tree to describe the preserved data.
+ *
+ * This interface is a contract. Any modification to the FDT structure, node
+ * properties, compatible strings, or the layout of the `__packed` serialization
+ * structures defined here constitutes a breaking change. Such changes require
+ * incrementing the version number in the relevant `_COMPATIBLE` string to
+ * prevent a new kernel from misinterpreting data from an old kernel.
+ *
+ * Changes are allowed provided the compatibility version is incremented;
+ * however, backward/forward compatibility is only guaranteed for kernels
+ * supporting the same ABI version.
+ *
+ * FDT Structure Overview:
+ * The entire LUO state is encapsulated within a single KHO entry named "LUO".
+ * This entry contains an FDT with the following layout:
+ *
+ * .. code-block:: none
+ *
+ * / {
+ * compatible = "luo-v1";
+ * liveupdate-number = <...>;
+ *
+ * luo-session {
+ * compatible = "luo-session-v1";
+ * luo-session-header = <phys_addr_of_session_header_ser>;
+ * };
+ * };
+ *
+ * Main LUO Node (/):
+ *
+ * - compatible: "luo-v1"
+ * Identifies the overall LUO ABI version.
+ * - liveupdate-number: u64
+ * A counter tracking the number of successful live updates performed.
+ *
+ * Session Node (luo-session):
+ * This node describes all preserved user-space sessions.
+ *
+ * - compatible: "luo-session-v1"
+ * Identifies the session ABI version.
+ * - luo-session-header: u64
+ * The physical address of a `struct luo_session_header_ser`. This structure
+ * is the header for a contiguous block of memory containing an array of
+ * `struct luo_session_ser`, one for each preserved session.
+ *
+ * Serialization Structures:
+ * The FDT properties point to memory regions containing arrays of simple,
+ * `__packed` structures. These structures contain the actual preserved state.
+ *
+ * - struct luo_session_header_ser:
+ * Header for the session array. Contains the total page count of the
+ * preserved memory block and the number of `struct luo_session_ser`
+ * entries that follow.
+ *
+ * - struct luo_session_ser:
+ * Metadata for a single session, including its name and a physical pointer
+ * to another preserved memory block containing an array of
+ * `struct luo_file_ser` for all files in that session.
+ *
+ * - struct luo_file_ser:
+ * Metadata for a single preserved file. Contains the `compatible` string to
+ * find the correct handler in the new kernel, a user-provided `token` for
+ * identification, and an opaque `data` handle for the handler to use.
+ */
+
+#ifndef _LINUX_KHO_ABI_LUO_H
+#define _LINUX_KHO_ABI_LUO_H
+
+#include <uapi/linux/liveupdate.h>
+
+/*
+ * The LUO FDT hooks all LUO state for sessions, fds, etc.
+ * In the root it also carries "liveupdate-number" 64-bit property that
+ * corresponds to the number of live-updates performed on this machine.
+ */
+#define LUO_FDT_SIZE PAGE_SIZE
+#define LUO_FDT_KHO_ENTRY_NAME "LUO"
+#define LUO_FDT_COMPATIBLE "luo-v1"
+#define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number"
+
+#define LIVEUPDATE_HNDL_COMPAT_LENGTH 48
+
+/**
+ * struct luo_file_ser - Represents the serialized preserves files.
+ * @compatible: File handler compatible string.
+ * @data: Private data
+ * @token: User provided token for this file
+ *
+ * If this structure is modified, LUO_SESSION_COMPATIBLE must be updated.
+ */
+struct luo_file_ser {
+ char compatible[LIVEUPDATE_HNDL_COMPAT_LENGTH];
+ u64 data;
+ u64 token;
+} __packed;
+
+/**
+ * struct luo_file_set_ser - Represents the serialized metadata for file set
+ * @files: The physical address of a contiguous memory block that holds
+ * the serialized state of files (array of luo_file_ser) in this file
+ * set.
+ * @count: The total number of files that were part of this session during
+ * serialization. Used for iteration and validation during
+ * restoration.
+ */
+struct luo_file_set_ser {
+ u64 files;
+ u64 count;
+} __packed;
+
+/*
+ * LUO FDT session node
+ * LUO_FDT_SESSION_HEADER: is a u64 physical address of struct
+ * luo_session_header_ser
+ */
+#define LUO_FDT_SESSION_NODE_NAME "luo-session"
+#define LUO_FDT_SESSION_COMPATIBLE "luo-session-v2"
+#define LUO_FDT_SESSION_HEADER "luo-session-header"
+
+/**
+ * struct luo_session_header_ser - Header for the serialized session data block.
+ * @count: The number of `struct luo_session_ser` entries that immediately
+ * follow this header in the memory block.
+ *
+ * This structure is located at the beginning of a contiguous block of
+ * physical memory preserved across the kexec. It provides the necessary
+ * metadata to interpret the array of session entries that follow.
+ *
+ * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated.
+ */
+struct luo_session_header_ser {
+ u64 count;
+} __packed;
+
+/**
+ * struct luo_session_ser - Represents the serialized metadata for a LUO session.
+ * @name: The unique name of the session, provided by the userspace at
+ * the time of session creation.
+ * @file_set_ser: Serialized files belonging to this session,
+ *
+ * This structure is used to package session-specific metadata for transfer
+ * between kernels via Kexec Handover. An array of these structures (one per
+ * session) is created and passed to the new kernel, allowing it to reconstruct
+ * the session context.
+ *
+ * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated.
+ */
+struct luo_session_ser {
+ char name[LIVEUPDATE_SESSION_NAME_LENGTH];
+ struct luo_file_set_ser file_set_ser;
+} __packed;
+
+#endif /* _LINUX_KHO_ABI_LUO_H */
diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h
new file mode 100644
index 000000000000..da7d063474a1
--- /dev/null
+++ b/include/linux/kho/abi/memfd.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Copyright (C) 2025 Amazon.com Inc. or its affiliates.
+ * Pratyush Yadav <ptyadav@amazon.de>
+ */
+
+#ifndef _LINUX_KHO_ABI_MEMFD_H
+#define _LINUX_KHO_ABI_MEMFD_H
+
+#include <linux/types.h>
+#include <linux/kexec_handover.h>
+
+/**
+ * DOC: memfd Live Update ABI
+ *
+ * This header defines the ABI for preserving the state of a memfd across a
+ * kexec reboot using the LUO.
+ *
+ * The state is serialized into a packed structure `struct memfd_luo_ser`
+ * which is handed over to the next kernel via the KHO mechanism.
+ *
+ * This interface is a contract. Any modification to the structure layout
+ * constitutes a breaking change. Such changes require incrementing the
+ * version number in the MEMFD_LUO_FH_COMPATIBLE string.
+ */
+
+/**
+ * MEMFD_LUO_FOLIO_DIRTY - The folio is dirty.
+ *
+ * This flag indicates the folio contains data from user. A non-dirty folio is
+ * one that was allocated (say using fallocate(2)) but not written to.
+ */
+#define MEMFD_LUO_FOLIO_DIRTY BIT(0)
+
+/**
+ * MEMFD_LUO_FOLIO_UPTODATE - The folio is up-to-date.
+ *
+ * An up-to-date folio has been zeroed out. shmem zeroes out folios on first
+ * use. This flag tracks which folios need zeroing.
+ */
+#define MEMFD_LUO_FOLIO_UPTODATE BIT(1)
+
+/**
+ * struct memfd_luo_folio_ser - Serialized state of a single folio.
+ * @pfn: The page frame number of the folio.
+ * @flags: Flags to describe the state of the folio.
+ * @index: The page offset (pgoff_t) of the folio within the original file.
+ */
+struct memfd_luo_folio_ser {
+ u64 pfn:52;
+ u64 flags:12;
+ u64 index;
+} __packed;
+
+/**
+ * struct memfd_luo_ser - Main serialization structure for a memfd.
+ * @pos: The file's current position (f_pos).
+ * @size: The total size of the file in bytes (i_size).
+ * @nr_folios: Number of folios in the folios array.
+ * @folios: KHO vmalloc descriptor pointing to the array of
+ * struct memfd_luo_folio_ser.
+ */
+struct memfd_luo_ser {
+ u64 pos;
+ u64 size;
+ u64 nr_folios;
+ struct kho_vmalloc folios;
+} __packed;
+
+/* The compatibility string for memfd file handler */
+#define MEMFD_LUO_FH_COMPATIBLE "memfd-v1"
+
+#endif /* _LINUX_KHO_ABI_MEMFD_H */
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
new file mode 100644
index 000000000000..a7f6ee5b6771
--- /dev/null
+++ b/include/linux/liveupdate.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#ifndef _LINUX_LIVEUPDATE_H
+#define _LINUX_LIVEUPDATE_H
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <uapi/linux/liveupdate.h>
+
+struct liveupdate_file_handler;
+struct file;
+
+/**
+ * struct liveupdate_file_op_args - Arguments for file operation callbacks.
+ * @handler: The file handler being called.
+ * @retrieved: The retrieve status for the 'can_finish / finish'
+ * operation.
+ * @file: The file object. For retrieve: [OUT] The callback sets
+ * this to the new file. For other ops: [IN] The caller sets
+ * this to the file being operated on.
+ * @serialized_data: The opaque u64 handle, preserve/prepare/freeze may update
+ * this field.
+ * @private_data: Private data for the file used to hold runtime state that
+ * is not preserved. Set by the handler's .preserve()
+ * callback, and must be freed in the handler's
+ * .unpreserve() callback.
+ *
+ * This structure bundles all parameters for the file operation callbacks.
+ * The 'data' and 'file' fields are used for both input and output.
+ */
+struct liveupdate_file_op_args {
+ struct liveupdate_file_handler *handler;
+ bool retrieved;
+ struct file *file;
+ u64 serialized_data;
+ void *private_data;
+};
+
+/**
+ * struct liveupdate_file_ops - Callbacks for live-updatable files.
+ * @can_preserve: Required. Lightweight check to see if this handler is
+ * compatible with the given file.
+ * @preserve: Required. Performs state-saving for the file.
+ * @unpreserve: Required. Cleans up any resources allocated by @preserve.
+ * @freeze: Optional. Final actions just before kernel transition.
+ * @unfreeze: Optional. Undo freeze operations.
+ * @retrieve: Required. Restores the file in the new kernel.
+ * @can_finish: Optional. Check if this FD can finish, i.e. all restoration
+ * pre-requirements for this FD are satisfied. Called prior to
+ * finish, in order to do successful finish calls for all
+ * resources in the session.
+ * @finish: Required. Final cleanup in the new kernel.
+ * @owner: Module reference
+ *
+ * All operations (except can_preserve) receive a pointer to a
+ * 'struct liveupdate_file_op_args' containing the necessary context.
+ */
+struct liveupdate_file_ops {
+ bool (*can_preserve)(struct liveupdate_file_handler *handler,
+ struct file *file);
+ int (*preserve)(struct liveupdate_file_op_args *args);
+ void (*unpreserve)(struct liveupdate_file_op_args *args);
+ int (*freeze)(struct liveupdate_file_op_args *args);
+ void (*unfreeze)(struct liveupdate_file_op_args *args);
+ int (*retrieve)(struct liveupdate_file_op_args *args);
+ bool (*can_finish)(struct liveupdate_file_op_args *args);
+ void (*finish)(struct liveupdate_file_op_args *args);
+ struct module *owner;
+};
+
+/**
+ * struct liveupdate_file_handler - Represents a handler for a live-updatable file type.
+ * @ops: Callback functions
+ * @compatible: The compatibility string (e.g., "memfd-v1", "vfiofd-v1")
+ * that uniquely identifies the file type this handler
+ * supports. This is matched against the compatible string
+ * associated with individual &struct file instances.
+ *
+ * Modules that want to support live update for specific file types should
+ * register an instance of this structure. LUO uses this registration to
+ * determine if a given file can be preserved and to find the appropriate
+ * operations to manage its state across the update.
+ */
+struct liveupdate_file_handler {
+ const struct liveupdate_file_ops *ops;
+ const char compatible[LIVEUPDATE_HNDL_COMPAT_LENGTH];
+
+ /* private: */
+
+ /*
+ * Used for linking this handler instance into a global list of
+ * registered file handlers.
+ */
+ struct list_head __private list;
+};
+
+#ifdef CONFIG_LIVEUPDATE
+
+/* Return true if live update orchestrator is enabled */
+bool liveupdate_enabled(void);
+
+/* Called during kexec to tell LUO that entered into reboot */
+int liveupdate_reboot(void);
+
+int liveupdate_register_file_handler(struct liveupdate_file_handler *fh);
+int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh);
+
+#else /* CONFIG_LIVEUPDATE */
+
+static inline bool liveupdate_enabled(void)
+{
+ return false;
+}
+
+static inline int liveupdate_reboot(void)
+{
+ return 0;
+}
+
+static inline int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_LIVEUPDATE */
+#endif /* _LINUX_LIVEUPDATE_H */
diff --git a/include/linux/math.h b/include/linux/math.h
index 0198c92cbe3e..6dc1d1d32fbc 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -148,11 +148,16 @@ __STRUCT_FRACT(u32)
/**
* abs - return absolute value of an argument
- * @x: the value. If it is unsigned type, it is converted to signed type first.
- * char is treated as if it was signed (regardless of whether it really is)
- * but the macro's return type is preserved as char.
+ * @x: the value.
*
- * Return: an absolute value of x.
+ * If it is unsigned type, @x is converted to signed type first.
+ * char is treated as if it was signed (regardless of whether it really is)
+ * but the macro's return type is preserved as char.
+ *
+ * NOTE, for signed type if @x is the minimum, the returned result is undefined
+ * as there is not enough bits to represent it as a positive number.
+ *
+ * Return: an absolute value of @x.
*/
#define abs(x) __abs_choose_expr(x, long long, \
__abs_choose_expr(x, long, \
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..cc305206d89f 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -158,6 +158,17 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
}
#endif
+#ifndef add_u64_u32
+/*
+ * Many a GCC version also messes this up.
+ * Zero extending b and then spilling everything to stack.
+ */
+static inline u64 add_u64_u32(u64 a, u32 b)
+{
+ return a + b;
+}
+#endif
+
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
#ifndef mul_u64_u32_shr
@@ -282,7 +293,53 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
}
#endif /* mul_u64_u32_div */
-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+/**
+ * mul_u64_add_u64_div_u64 - unsigned 64bit multiply, add, and divide
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @c: unsigned 64bit addend
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * add a third value and then divide by a fourth.
+ * The Generic code divides by 0 if @d is zero and returns ~0 on overflow.
+ * Architecture specific code may trap on zero or overflow.
+ *
+ * Return: (@a * @b + @c) / @d
+ */
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d);
+
+/**
+ * mul_u64_u64_div_u64 - unsigned 64bit multiply and divide
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * and then divide by a third value.
+ * The Generic code divides by 0 if @d is zero and returns ~0 on overflow.
+ * Architecture specific code may trap on zero or overflow.
+ *
+ * Return: @a * @b / @d
+ */
+#define mul_u64_u64_div_u64(a, b, d) mul_u64_add_u64_div_u64(a, b, 0, d)
+
+/**
+ * mul_u64_u64_div_u64_roundup - unsigned 64bit multiply and divide rounded up
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * and then divide and round up.
+ * The Generic code divides by 0 if @d is zero and returns ~0 on overflow.
+ * Architecture specific code may trap on zero or overflow.
+ *
+ * Return: (@a * @b + @d - 1) / @d
+ */
+#define mul_u64_u64_div_u64_roundup(a, b, d) \
+ ({ u64 _tmp = (d); mul_u64_add_u64_div_u64(a, b, _tmp - 1, _tmp); })
+
/**
* DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h
index 27de7bc32a06..236592c4eeb1 100644
--- a/include/linux/once_lite.h
+++ b/include/linux/once_lite.h
@@ -16,7 +16,7 @@
bool __ret_cond = !!(condition); \
bool __ret_once = false; \
\
- if (unlikely(__ret_cond && !__already_done)) { \
+ if (unlikely(__ret_cond) && unlikely(!__already_done)) {\
__already_done = true; \
__ret_once = true; \
} \
diff --git a/include/linux/panic.h b/include/linux/panic.h
index 6f972a66c13e..a00bc0937698 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -86,7 +86,6 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
struct taint_flag {
char c_true; /* character printed when tainted */
char c_false; /* character printed when not tainted */
- bool module; /* also show as a per-module taint flag */
const char *desc; /* verbose description of the set taint flag */
};
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d2ba3749866..4091e978aef2 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -43,8 +43,36 @@ extern void rb_erase(struct rb_node *, struct rb_root *);
/* Find logical next and previous nodes in a tree */
extern struct rb_node *rb_next(const struct rb_node *);
extern struct rb_node *rb_prev(const struct rb_node *);
-extern struct rb_node *rb_first(const struct rb_root *);
-extern struct rb_node *rb_last(const struct rb_root *);
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+static inline struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+/*
+ * This function returns the last node (in sort order) of the tree.
+ */
+static inline struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 5e4b3c1ae5c2..e2069b3179c4 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -10,6 +10,7 @@
#include <linux/xattr.h>
#include <linux/fs_parser.h>
#include <linux/userfaultfd_k.h>
+#include <linux/bits.h>
struct swap_iocb;
@@ -19,6 +20,19 @@ struct swap_iocb;
#define SHMEM_MAXQUOTAS 2
#endif
+/* Suppress pre-accounting of the entire object size. */
+#define SHMEM_F_NORESERVE BIT(0)
+/* Disallow swapping. */
+#define SHMEM_F_LOCKED BIT(1)
+/*
+ * Disallow growing, shrinking, or hole punching in the inode. Combined with
+ * folio pinning, makes sure the inode's mapping stays fixed.
+ *
+ * In some ways similar to F_SEAL_GROW | F_SEAL_SHRINK, but can be removed and
+ * isn't directly visible to userspace.
+ */
+#define SHMEM_F_MAPPING_FROZEN BIT(2)
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
@@ -186,6 +200,15 @@ static inline bool shmem_file(struct file *file)
return shmem_mapping(file->f_mapping);
}
+/* Must be called with inode lock taken exclusive. */
+static inline void shmem_freeze(struct inode *inode, bool freeze)
+{
+ if (freeze)
+ SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN;
+ else
+ SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN;
+}
+
/*
* If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
* beyond i_size's notion of EOF, which fallocate has committed to reserving:
diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h
index 89d77dc4f2ed..a5bc3ea3d44b 100644
--- a/include/linux/sys_info.h
+++ b/include/linux/sys_info.h
@@ -14,7 +14,7 @@
#define SYS_INFO_LOCKS 0x00000008
#define SYS_INFO_FTRACE 0x00000010
#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020
-#define SYS_INFO_ALL_CPU_BT 0x00000040
+#define SYS_INFO_ALL_BT 0x00000040
#define SYS_INFO_BLOCKED_TASKS 0x00000080
void sys_info(unsigned long si_mask);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index be395f5f7ee3..1f3804245c06 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -161,8 +161,6 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
* directly in the normal copy_to/from_user(), the other ones go
* through an extern _copy_to/from_user(), which expands the same code
* here.
- *
- * Rust code always uses the extern definition.
*/
static inline __must_check unsigned long
_inline_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -192,8 +190,10 @@ fail:
memset(to + (n - res), 0, res);
return res;
}
+#ifndef INLINE_COPY_FROM_USER
extern __must_check unsigned long
_copy_from_user(void *, const void __user *, unsigned long);
+#endif
static inline __must_check unsigned long
_inline_copy_to_user(void __user *to, const void *from, unsigned long n)
@@ -207,8 +207,10 @@ _inline_copy_to_user(void __user *to, const void *from, unsigned long n)
}
return n;
}
+#ifndef INLINE_COPY_TO_USER
extern __must_check unsigned long
_copy_to_user(void __user *, const void *, unsigned long);
+#endif
static __always_inline unsigned long __must_check
copy_from_user(void *to, const void __user *from, unsigned long n)
diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 9373962aade9..2eb528058d0d 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -136,10 +136,10 @@
#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
/**
- * to_user_ptr - cast a pointer passed as u64 from user space to void __user *
+ * u64_to_user_ptr - cast a pointer passed as u64 from user space to void __user *
* @x: The u64 value from user space, usually via IOCTL
*
- * to_user_ptr() simply casts a pointer passed as u64 from user space to void
+ * u64_to_user_ptr() simply casts a pointer passed as u64 from user space to void
* __user * correctly. Using this lets us get rid of all the tiresome casts.
*/
#define u64_to_user_ptr(x) \
diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h
index 37e003ae5262..e71518caacdf 100644
--- a/include/linux/vmcore_info.h
+++ b/include/linux/vmcore_info.h
@@ -5,6 +5,7 @@
#include <linux/linkage.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
+#include <uapi/linux/vmcore.h>
#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4)
@@ -77,4 +78,11 @@ extern u32 *vmcoreinfo_note;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
void final_note(Elf_Word *buf);
+
+#ifdef CONFIG_VMCORE_INFO
+void hwerr_log_error_type(enum hwerr_error_type src);
+#else
+static inline void hwerr_log_error_type(enum hwerr_error_type src) {};
+#endif
+
#endif /* LINUX_VMCORE_INFO_H */
diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h
index 27f57eca8cb1..587122e2c29c 100644
--- a/include/linux/xxhash.h
+++ b/include/linux/xxhash.h
@@ -141,21 +141,7 @@ static inline unsigned long xxhash(const void *input, size_t length,
*/
/**
- * struct xxh32_state - private xxh32 state, do not use members directly
- */
-struct xxh32_state {
- uint32_t total_len_32;
- uint32_t large_len;
- uint32_t v1;
- uint32_t v2;
- uint32_t v3;
- uint32_t v4;
- uint32_t mem32[4];
- uint32_t memsize;
-};
-
-/**
- * struct xxh32_state - private xxh64 state, do not use members directly
+ * struct xxh64_state - private xxh64 state, do not use members directly
*/
struct xxh64_state {
uint64_t total_len;
@@ -168,16 +154,6 @@ struct xxh64_state {
};
/**
- * xxh32_reset() - reset the xxh32 state to start a new hashing operation
- *
- * @state: The xxh32 state to reset.
- * @seed: Initialize the hash state with this seed.
- *
- * Call this function on any xxh32_state to prepare for a new hashing operation.
- */
-void xxh32_reset(struct xxh32_state *state, uint32_t seed);
-
-/**
* xxh64_reset() - reset the xxh64 state to start a new hashing operation
*
* @state: The xxh64 state to reset.
@@ -210,24 +186,4 @@ int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
*/
uint64_t xxh64_digest(const struct xxh64_state *state);
-/*-**************************
- * Utils
- ***************************/
-
-/**
- * xxh32_copy_state() - copy the source state into the destination state
- *
- * @src: The source xxh32 state.
- * @dst: The destination xxh32 state.
- */
-void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
-
-/**
- * xxh64_copy_state() - copy the source state into the destination state
- *
- * @src: The source xxh64 state.
- * @dst: The destination xxh64 state.
- */
-void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
-
#endif /* XXHASH_H */
diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h
new file mode 100644
index 000000000000..30bc66ee9436
--- /dev/null
+++ b/include/uapi/linux/liveupdate.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+/*
+ * Userspace interface for /dev/liveupdate
+ * Live Update Orchestrator
+ *
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _UAPI_LIVEUPDATE_H
+#define _UAPI_LIVEUPDATE_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * DOC: General ioctl format
+ *
+ * The ioctl interface follows a general format to allow for extensibility. Each
+ * ioctl is passed in a structure pointer as the argument providing the size of
+ * the structure in the first u32. The kernel checks that any structure space
+ * beyond what it understands is 0. This allows userspace to use the backward
+ * compatible portion while consistently using the newer, larger, structures.
+ *
+ * ioctls use a standard meaning for common errnos:
+ *
+ * - ENOTTY: The IOCTL number itself is not supported at all
+ * - E2BIG: The IOCTL number is supported, but the provided structure has
+ * non-zero in a part the kernel does not understand.
+ * - EOPNOTSUPP: The IOCTL number is supported, and the structure is
+ * understood, however a known field has a value the kernel does not
+ * understand or support.
+ * - EINVAL: Everything about the IOCTL was understood, but a field is not
+ * correct.
+ * - ENOENT: A provided token does not exist.
+ * - ENOMEM: Out of memory.
+ * - EOVERFLOW: Mathematics overflowed.
+ *
+ * As well as additional errnos, within specific ioctls.
+ */
+
+/* The ioctl type, documented in ioctl-number.rst */
+#define LIVEUPDATE_IOCTL_TYPE 0xBA
+
+/* The maximum length of session name including null termination */
+#define LIVEUPDATE_SESSION_NAME_LENGTH 64
+
+/* The /dev/liveupdate ioctl commands */
+enum {
+ LIVEUPDATE_CMD_BASE = 0x00,
+ LIVEUPDATE_CMD_CREATE_SESSION = LIVEUPDATE_CMD_BASE,
+ LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01,
+};
+
+/* ioctl commands for session file descriptors */
+enum {
+ LIVEUPDATE_CMD_SESSION_BASE = 0x40,
+ LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE,
+ LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41,
+ LIVEUPDATE_CMD_SESSION_FINISH = 0x42,
+};
+
+/**
+ * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION)
+ * @size: Input; sizeof(struct liveupdate_ioctl_create_session)
+ * @fd: Output; The new file descriptor for the created session.
+ * @name: Input; A null-terminated string for the session name, max
+ * length %LIVEUPDATE_SESSION_NAME_LENGTH including termination
+ * character.
+ *
+ * Creates a new live update session for managing preserved resources.
+ * This ioctl can only be called on the main /dev/liveupdate device.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+struct liveupdate_ioctl_create_session {
+ __u32 size;
+ __s32 fd;
+ __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH];
+};
+
+#define LIVEUPDATE_IOCTL_CREATE_SESSION \
+ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_CREATE_SESSION)
+
+/**
+ * struct liveupdate_ioctl_retrieve_session - ioctl(LIVEUPDATE_IOCTL_RETRIEVE_SESSION)
+ * @size: Input; sizeof(struct liveupdate_ioctl_retrieve_session)
+ * @fd: Output; The new file descriptor for the retrieved session.
+ * @name: Input; A null-terminated string identifying the session to retrieve.
+ * The name must exactly match the name used when the session was
+ * created in the previous kernel.
+ *
+ * Retrieves a handle (a new file descriptor) for a preserved session by its
+ * name. This is the primary mechanism for a userspace agent to regain control
+ * of its preserved resources after a live update.
+ *
+ * The userspace application provides the null-terminated `name` of a session
+ * it created before the live update. If a preserved session with a matching
+ * name is found, the kernel instantiates it and returns a new file descriptor
+ * in the `fd` field. This new session FD can then be used for all file-specific
+ * operations, such as restoring individual file descriptors with
+ * LIVEUPDATE_SESSION_RETRIEVE_FD.
+ *
+ * It is the responsibility of the userspace application to know the names of
+ * the sessions it needs to retrieve. If no session with the given name is
+ * found, the ioctl will fail with -ENOENT.
+ *
+ * This ioctl can only be called on the main /dev/liveupdate device when the
+ * system is in the LIVEUPDATE_STATE_UPDATED state.
+ */
+struct liveupdate_ioctl_retrieve_session {
+ __u32 size;
+ __s32 fd;
+ __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH];
+};
+
+#define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \
+ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION)
+
+/* Session specific IOCTLs */
+
+/**
+ * struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD)
+ * @size: Input; sizeof(struct liveupdate_session_preserve_fd)
+ * @fd: Input; The user-space file descriptor to be preserved.
+ * @token: Input; An opaque, unique token for preserved resource.
+ *
+ * Holds parameters for preserving a file descriptor.
+ *
+ * User sets the @fd field identifying the file descriptor to preserve
+ * (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type
+ * and its dependencies are supported for preservation. If validation passes,
+ * the kernel marks the FD internally and *initiates the process* of preparing
+ * its state for saving. The actual snapshotting of the state typically occurs
+ * during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though
+ * some finalization might occur during freeze.
+ * On successful validation and initiation, the kernel uses the @token
+ * field with an opaque identifier representing the resource being preserved.
+ * This token confirms the FD is targeted for preservation and is required for
+ * the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update.
+ *
+ * Return: 0 on success (validation passed, preservation initiated), negative
+ * error code on failure (e.g., unsupported FD type, dependency issue,
+ * validation failed).
+ */
+struct liveupdate_session_preserve_fd {
+ __u32 size;
+ __s32 fd;
+ __aligned_u64 token;
+};
+
+#define LIVEUPDATE_SESSION_PRESERVE_FD \
+ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD)
+
+/**
+ * struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD)
+ * @size: Input; sizeof(struct liveupdate_session_retrieve_fd)
+ * @fd: Output; The new file descriptor representing the fully restored
+ * kernel resource.
+ * @token: Input; An opaque, token that was used to preserve the resource.
+ *
+ * Retrieve a previously preserved file descriptor.
+ *
+ * User sets the @token field to the value obtained from a successful
+ * %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success,
+ * the kernel restores the state (saved during the PREPARE/FREEZE phases)
+ * associated with the token and populates the @fd field with a new file
+ * descriptor referencing the restored resource in the current (new) kernel.
+ * This operation must be performed *before* signaling completion via
+ * %LIVEUPDATE_IOCTL_FINISH.
+ *
+ * Return: 0 on success, negative error code on failure (e.g., invalid token).
+ */
+struct liveupdate_session_retrieve_fd {
+ __u32 size;
+ __s32 fd;
+ __aligned_u64 token;
+};
+
+#define LIVEUPDATE_SESSION_RETRIEVE_FD \
+ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD)
+
+/**
+ * struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH)
+ * @size: Input; sizeof(struct liveupdate_session_finish)
+ * @reserved: Input; Must be zero. Reserved for future use.
+ *
+ * Signals the completion of the restoration process for a retrieved session.
+ * This is the final operation that should be performed on a session file
+ * descriptor after a live update.
+ *
+ * This ioctl must be called once all required file descriptors for the session
+ * have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and
+ * are fully restored from the userspace and kernel perspective.
+ *
+ * Upon success, the kernel releases its ownership of the preserved resources
+ * associated with this session. This allows internal resources to be freed,
+ * typically by decrementing reference counts on the underlying preserved
+ * objects.
+ *
+ * If this operation fails, the resources remain preserved in memory. Userspace
+ * may attempt to call finish again. The resources will otherwise be reset
+ * during the next live update cycle.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+struct liveupdate_session_finish {
+ __u32 size;
+ __u32 reserved;
+};
+
+#define LIVEUPDATE_SESSION_FINISH \
+ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH)
+
+#endif /* _UAPI_LIVEUPDATE_H */
diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h
index 3e9da91866ff..2ba89fafa518 100644
--- a/include/uapi/linux/vmcore.h
+++ b/include/uapi/linux/vmcore.h
@@ -15,4 +15,13 @@ struct vmcoredd_header {
__u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */
};
+enum hwerr_error_type {
+ HWERR_RECOV_CPU,
+ HWERR_RECOV_MEMORY,
+ HWERR_RECOV_PCI,
+ HWERR_RECOV_CXL,
+ HWERR_RECOV_OTHERS,
+ HWERR_RECOV_MAX,
+};
+
#endif /* _UAPI_VMCORE_H */
diff --git a/init/Kconfig b/init/Kconfig
index ad42558d842d..fa79feb8fe57 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1519,6 +1519,24 @@ config BOOT_CONFIG_EMBED_FILE
This bootconfig will be used if there is no initrd or no other
bootconfig in the initrd.
+config CMDLINE_LOG_WRAP_IDEAL_LEN
+ int "Length to try to wrap the cmdline when logged at boot"
+ default 1021
+ range 0 1021
+ help
+ At boot time, the kernel command line is logged to the console.
+ The log message will start with the prefix "Kernel command line: ".
+ The log message will attempt to be wrapped (split into multiple log
+ messages) at spaces based on CMDLINE_LOG_WRAP_IDEAL_LEN characters.
+ If wrapping happens, each log message will start with the prefix and
+ all but the last message will end with " \". Messages may exceed the
+ ideal length if a place to wrap isn't found before the specified
+ number of characters.
+
+ A value of 0 disables wrapping, though be warned that the maximum
+ length of a log message (1021 characters) may cause the cmdline to
+ be truncated.
+
config INITRAMFS_PRESERVE_MTIME
bool "Preserve cpio archive mtimes in initramfs"
depends on BLK_DEV_INITRD
@@ -2171,6 +2189,8 @@ config TRACEPOINTS
source "kernel/Kconfig.kexec"
+source "kernel/liveupdate/Kconfig"
+
endmenu # General setup
source "arch/Kconfig"
diff --git a/init/calibrate.c b/init/calibrate.c
index f3831272f113..63be4c65bc52 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -5,19 +5,22 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/jiffies.h>
#include <linux/delay.h>
#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/kstrtox.h>
#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/timex.h>
unsigned long lpj_fine;
unsigned long preset_lpj;
+
static int __init lpj_setup(char *str)
{
- preset_lpj = simple_strtoul(str,NULL,0);
- return 1;
+ return kstrtoul(str, 0, &preset_lpj) == 0;
}
__setup("lpj=", lpj_setup);
diff --git a/init/main.c b/init/main.c
index 07a3116811c5..b84818ad9685 100644
--- a/init/main.c
+++ b/init/main.c
@@ -906,6 +906,101 @@ static void __init early_numa_node_init(void)
#endif
}
+#define KERNEL_CMDLINE_PREFIX "Kernel command line: "
+#define KERNEL_CMDLINE_PREFIX_LEN (sizeof(KERNEL_CMDLINE_PREFIX) - 1)
+#define KERNEL_CMDLINE_CONTINUATION " \\"
+#define KERNEL_CMDLINE_CONTINUATION_LEN (sizeof(KERNEL_CMDLINE_CONTINUATION) - 1)
+
+#define MIN_CMDLINE_LOG_WRAP_IDEAL_LEN (KERNEL_CMDLINE_PREFIX_LEN + \
+ KERNEL_CMDLINE_CONTINUATION_LEN)
+#define CMDLINE_LOG_WRAP_IDEAL_LEN (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN > \
+ MIN_CMDLINE_LOG_WRAP_IDEAL_LEN ? \
+ CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN : \
+ MIN_CMDLINE_LOG_WRAP_IDEAL_LEN)
+
+#define IDEAL_CMDLINE_LEN (CMDLINE_LOG_WRAP_IDEAL_LEN - KERNEL_CMDLINE_PREFIX_LEN)
+#define IDEAL_CMDLINE_SPLIT_LEN (IDEAL_CMDLINE_LEN - KERNEL_CMDLINE_CONTINUATION_LEN)
+
+/**
+ * print_kernel_cmdline() - Print the kernel cmdline with wrapping.
+ * @cmdline: The cmdline to print.
+ *
+ * Print the kernel command line, trying to wrap based on the Kconfig knob
+ * CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN.
+ *
+ * Wrapping is based on spaces, ignoring quotes. All lines are prefixed
+ * with "Kernel command line: " and lines that are not the last line have
+ * a " \" suffix added to them. The prefix and suffix count towards the
+ * line length for wrapping purposes. The ideal length will be exceeded
+ * if no appropriate place to wrap is found.
+ *
+ * Example output if CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN is 40:
+ * Kernel command line: loglevel=7 \
+ * Kernel command line: init=/sbin/init \
+ * Kernel command line: root=PARTUUID=8c3efc1a-768b-6642-8d0c-89eb782f19f0/PARTNROFF=1 \
+ * Kernel command line: rootwait ro \
+ * Kernel command line: my_quoted_arg="The \
+ * Kernel command line: quick brown fox \
+ * Kernel command line: jumps over the \
+ * Kernel command line: lazy dog."
+ */
+static void __init print_kernel_cmdline(const char *cmdline)
+{
+ size_t len;
+
+ /* Config option of 0 or anything longer than the max disables wrapping */
+ if (CONFIG_CMDLINE_LOG_WRAP_IDEAL_LEN == 0 ||
+ IDEAL_CMDLINE_LEN >= COMMAND_LINE_SIZE - 1) {
+ pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline);
+ return;
+ }
+
+ len = strlen(cmdline);
+ while (len > IDEAL_CMDLINE_LEN) {
+ const char *first_space;
+ const char *prev_cutoff;
+ const char *cutoff;
+ int to_print;
+ size_t used;
+
+ /* Find the last ' ' that wouldn't make the line too long */
+ prev_cutoff = NULL;
+ cutoff = cmdline;
+ while (true) {
+ cutoff = strchr(cutoff + 1, ' ');
+ if (!cutoff || cutoff - cmdline > IDEAL_CMDLINE_SPLIT_LEN)
+ break;
+ prev_cutoff = cutoff;
+ }
+ if (prev_cutoff)
+ cutoff = prev_cutoff;
+ else if (!cutoff)
+ break;
+
+ /* Find the beginning and end of the string of spaces */
+ first_space = cutoff;
+ while (first_space > cmdline && first_space[-1] == ' ')
+ first_space--;
+ to_print = first_space - cmdline;
+ while (*cutoff == ' ')
+ cutoff++;
+ used = cutoff - cmdline;
+
+ /* If the whole string is used, break and do the final printout */
+ if (len == used)
+ break;
+
+ if (to_print)
+ pr_notice("%s%.*s%s\n", KERNEL_CMDLINE_PREFIX,
+ to_print, cmdline, KERNEL_CMDLINE_CONTINUATION);
+
+ len -= used;
+ cmdline += used;
+ }
+ if (len)
+ pr_notice("%s%s\n", KERNEL_CMDLINE_PREFIX, cmdline);
+}
+
asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
void start_kernel(void)
{
@@ -942,7 +1037,7 @@ void start_kernel(void)
early_numa_node_init();
boot_cpu_hotplug_init();
- pr_notice("Kernel command line: %s\n", saved_command_line);
+ print_kernel_cmdline(saved_command_line);
/* parameters may set static keys */
parse_early_param();
after_dashes = parse_args("Booting kernel",
diff --git a/ipc/namespace.c b/ipc/namespace.c
index c0dbfdd9015f..535f16ea40e1 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -76,10 +76,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
err = -ENOMEM;
if (!setup_mq_sysctls(ns))
- goto fail_put;
+ goto fail_mq_mount;
if (!setup_ipc_sysctls(ns))
- goto fail_mq;
+ goto fail_mq_sysctls;
err = msg_init_ns(ns);
if (err)
@@ -93,9 +93,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
fail_ipc:
retire_ipc_sysctls(ns);
-fail_mq:
+fail_mq_sysctls:
retire_mq_sysctls(ns);
-
+fail_mq_mount:
+ mntput(ns->mq_mnt);
fail_put:
put_user_ns(ns->user_ns);
ns_common_free(ns);
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 54e581072617..15632358bcf7 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -94,30 +94,6 @@ config KEXEC_JUMP
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC
-config KEXEC_HANDOVER
- bool "kexec handover"
- depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE
- depends on !DEFERRED_STRUCT_PAGE_INIT
- select MEMBLOCK_KHO_SCRATCH
- select KEXEC_FILE
- select DEBUG_FS
- select LIBFDT
- select CMA
- help
- Allow kexec to hand over state across kernels by generating and
- passing additional metadata to the target kernel. This is useful
- to keep data or state alive across the kexec. For this to work,
- both source and target kernels need to have this option enabled.
-
-config KEXEC_HANDOVER_DEBUG
- bool "Enable Kexec Handover debug checks"
- depends on KEXEC_HANDOVER
- help
- This option enables extra sanity checks for the Kexec Handover
- subsystem. Since, KHO performance is crucial in live update
- scenarios and the extra code might be adding overhead it is
- only optionally enabled.
-
config CRASH_DUMP
bool "kernel crash dumps"
default ARCH_DEFAULT_CRASH_DUMP
diff --git a/kernel/Makefile b/kernel/Makefile
index 9fe722305c9b..e83669841b8c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -52,6 +52,7 @@ obj-y += printk/
obj-y += irq/
obj-y += rcu/
obj-y += livepatch/
+obj-y += liveupdate/
obj-y += dma/
obj-y += entry/
obj-y += unwind/
@@ -82,8 +83,6 @@ obj-$(CONFIG_CRASH_DUMP_KUNIT_TEST) += crash_core_test.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
-obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
-obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup/
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index e81327d2cd63..9f6ab7dabf67 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -83,7 +83,7 @@ CONFIG_SLUB_DEBUG_ON=y
#
# Debug Oops, Lockups and Hangs
#
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DETECT_HUNG_TASK=y
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index 87bf4d41eabb..62e60e0223cf 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -524,6 +524,9 @@ void __init reserve_crashkernel_cma(unsigned long long cma_size)
#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
+ if (!arch_add_crash_res_to_iomem())
+ return 0;
+
if (crashk_res.start < crashk_res.end)
insert_resource(&iomem_resource, &crashk_res);
diff --git a/kernel/exit.c b/kernel/exit.c
index 4dc1918db67b..8a87021211ae 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -251,10 +251,8 @@ repeat:
memset(&post, 0, sizeof(post));
/* don't need to get the RCU readlock here - the process is dead and
- * can't be modifying its own credentials. But shut RCU-lockdep up */
- rcu_read_lock();
+ * can't be modifying its own credentials. */
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
- rcu_read_unlock();
pidfs_exit(p);
cgroup_task_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 198e02e21e6e..b1f3915d5f8e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -208,15 +208,62 @@ struct vm_stack {
struct vm_struct *stack_vm_area;
};
+static struct vm_struct *alloc_thread_stack_node_from_cache(struct task_struct *tsk, int node)
+{
+ struct vm_struct *vm_area;
+ unsigned int i;
+
+ /*
+ * If the node has memory, we are guaranteed the stacks are backed by local pages.
+ * Otherwise the pages are arbitrary.
+ *
+ * Note that depending on cpuset it is possible we will get migrated to a different
+ * node immediately after allocating here, so this does *not* guarantee locality for
+ * arbitrary callers.
+ */
+ scoped_guard(preempt) {
+ if (node != NUMA_NO_NODE && numa_node_id() != node)
+ return NULL;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ vm_area = this_cpu_xchg(cached_stacks[i], NULL);
+ if (vm_area)
+ return vm_area;
+ }
+ }
+
+ return NULL;
+}
+
static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)
{
unsigned int i;
+ int nid;
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- struct vm_struct *tmp = NULL;
+ /*
+ * Don't cache stacks if any of the pages don't match the local domain, unless
+ * there is no local memory to begin with.
+ *
+ * Note that lack of local memory does not automatically mean it makes no difference
+ * performance-wise which other domain backs the stack. In this case we are merely
+ * trying to avoid constantly going to vmalloc.
+ */
+ scoped_guard(preempt) {
+ nid = numa_node_id();
+ if (node_state(nid, N_MEMORY)) {
+ for (i = 0; i < vm_area->nr_pages; i++) {
+ struct page *page = vm_area->pages[i];
+ if (page_to_nid(page) != nid)
+ return false;
+ }
+ }
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ struct vm_struct *tmp = NULL;
- if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area))
- return true;
+ if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area))
+ return true;
+ }
}
return false;
}
@@ -283,13 +330,9 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct vm_struct *vm_area;
void *stack;
- int i;
-
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- vm_area = this_cpu_xchg(cached_stacks[i], NULL);
- if (!vm_area)
- continue;
+ vm_area = alloc_thread_stack_node_from_cache(tsk, node);
+ if (vm_area) {
if (memcg_charge_kernel_stack(vm_area)) {
vfree(vm_area->addr);
return -ENOMEM;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b2c1f14b8129..d2254c91450b 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -24,6 +24,7 @@
#include <linux/sched/sysctl.h>
#include <linux/hung_task.h>
#include <linux/rwsem.h>
+#include <linux/sys_info.h>
#include <trace/events/sched.h>
@@ -50,7 +51,6 @@ static unsigned long __read_mostly sysctl_hung_task_detect_count;
* Zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
-EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs);
/*
* Zero (default value) means use sysctl_hung_task_timeout_secs:
@@ -60,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
static int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
-static bool hung_task_show_lock;
static bool hung_task_call_panic;
-static bool hung_task_show_all_bt;
static struct task_struct *watchdog_task;
+/*
+ * A bitmask to control what kinds of system info to be printed when
+ * a hung task is detected, it could be task, memory, lock etc. Refer
+ * include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long hung_task_si_mask;
+
#ifdef CONFIG_SMP
/*
* Should we dump all CPUs backtraces in a hung task event?
@@ -81,7 +86,7 @@ static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
* hung task is detected:
*/
static unsigned int __read_mostly sysctl_hung_task_panic =
- IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC);
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC;
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -218,8 +223,11 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
}
#endif
-static void check_hung_task(struct task_struct *t, unsigned long timeout)
+static void check_hung_task(struct task_struct *t, unsigned long timeout,
+ unsigned long prev_detect_count)
{
+ unsigned long total_hung_task;
+
if (!task_is_hung(t, timeout))
return;
@@ -229,11 +237,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
*/
sysctl_hung_task_detect_count++;
+ total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
trace_sched_process_hang(t);
- if (sysctl_hung_task_panic) {
+ if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
console_verbose();
- hung_task_show_lock = true;
hung_task_call_panic = true;
}
@@ -256,10 +264,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
" disables this message.\n");
sched_show_task(t);
debug_show_blocker(t, timeout);
- hung_task_show_lock = true;
- if (sysctl_hung_task_all_cpu_backtrace)
- hung_task_show_all_bt = true;
if (!sysctl_hung_task_warnings)
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
}
@@ -300,6 +305,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
int max_count = sysctl_hung_task_check_count;
unsigned long last_break = jiffies;
struct task_struct *g, *t;
+ unsigned long prev_detect_count = sysctl_hung_task_detect_count;
+ int need_warning = sysctl_hung_task_warnings;
+ unsigned long si_mask = hung_task_si_mask;
/*
* If the system crashed already then all bets are off,
@@ -308,7 +316,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
- hung_task_show_lock = false;
+
rcu_read_lock();
for_each_process_thread(g, t) {
@@ -320,18 +328,23 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
last_break = jiffies;
}
- check_hung_task(t, timeout);
+ check_hung_task(t, timeout, prev_detect_count);
}
unlock:
rcu_read_unlock();
- if (hung_task_show_lock)
- debug_show_all_locks();
- if (hung_task_show_all_bt) {
- hung_task_show_all_bt = false;
- trigger_all_cpu_backtrace();
+ if (!(sysctl_hung_task_detect_count - prev_detect_count))
+ return;
+
+ if (need_warning || hung_task_call_panic) {
+ si_mask |= SYS_INFO_LOCKS;
+
+ if (sysctl_hung_task_all_cpu_backtrace)
+ si_mask |= SYS_INFO_ALL_BT;
}
+ sys_info(si_mask);
+
if (hung_task_call_panic)
panic("hung_task: blocked tasks");
}
@@ -389,7 +402,7 @@ static const struct ctl_table hung_task_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "hung_task_check_count",
@@ -430,6 +443,13 @@ static const struct ctl_table hung_task_sysctls[] = {
.mode = 0444,
.proc_handler = proc_doulongvec_minmax,
},
+ {
+ .procname = "hung_task_sys_info",
+ .data = &hung_task_si_mask,
+ .maxlen = sizeof(hung_task_si_mask),
+ .mode = 0644,
+ .proc_handler = sysctl_sys_info_handler,
+ },
};
static void __init hung_task_sysctl_init(void)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index fa00b239c5d9..0f92acdd354d 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -15,6 +15,7 @@
#include <linux/kexec.h>
#include <linux/mutex.h>
#include <linux/list.h>
+#include <linux/liveupdate.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <linux/reboot.h>
@@ -41,6 +42,7 @@
#include <linux/objtool.h>
#include <linux/kmsg_dump.h>
#include <linux/dma-map-ops.h>
+#include <linux/sysfs.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -742,7 +744,6 @@ static int kimage_load_cma_segment(struct kimage *image, int idx)
struct kexec_segment *segment = &image->segment[idx];
struct page *cma = image->segment_cma[idx];
char *ptr = page_address(cma);
- unsigned long maddr;
size_t ubytes, mbytes;
int result = 0;
unsigned char __user *buf = NULL;
@@ -754,15 +755,12 @@ static int kimage_load_cma_segment(struct kimage *image, int idx)
buf = segment->buf;
ubytes = segment->bufsz;
mbytes = segment->memsz;
- maddr = segment->mem;
/* Then copy from source buffer to the CMA one */
while (mbytes) {
size_t uchunk, mchunk;
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (uchunk) {
@@ -784,7 +782,6 @@ static int kimage_load_cma_segment(struct kimage *image, int idx)
}
ptr += mchunk;
- maddr += mchunk;
mbytes -= mchunk;
cond_resched();
@@ -839,9 +836,7 @@ static int kimage_load_normal_segment(struct kimage *image, int idx)
ptr = kmap_local_page(page);
/* Start with a clear page */
clear_page(ptr);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (uchunk) {
@@ -904,9 +899,7 @@ static int kimage_load_crash_segment(struct kimage *image, int idx)
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
ptr = kmap_local_page(page);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (mchunk > uchunk) {
/* Zero the trailing part of the page */
@@ -1146,6 +1139,10 @@ int kernel_kexec(void)
goto Unlock;
}
+ error = liveupdate_reboot();
+ if (error)
+ goto Unlock;
+
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
/*
@@ -1229,3 +1226,143 @@ int kernel_kexec(void)
kexec_unlock();
return error;
}
+
+static ssize_t loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", !!kexec_image);
+}
+static struct kobj_attribute loaded_attr = __ATTR_RO(loaded);
+
+#ifdef CONFIG_CRASH_DUMP
+static ssize_t crash_loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", kexec_crash_loaded());
+}
+static struct kobj_attribute crash_loaded_attr = __ATTR_RO(crash_loaded);
+
+#ifdef CONFIG_CRASH_RESERVE
+static ssize_t crash_cma_ranges_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < crashk_cma_cnt; ++i) {
+ len += sysfs_emit_at(buf, len, "%08llx-%08llx\n",
+ crashk_cma_ranges[i].start,
+ crashk_cma_ranges[i].end);
+ }
+ return len;
+}
+static struct kobj_attribute crash_cma_ranges_attr = __ATTR_RO(crash_cma_ranges);
+#endif
+
+static ssize_t crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t size = crash_get_memory_size();
+
+ if (size < 0)
+ return size;
+
+ return sysfs_emit(buf, "%zd\n", size);
+}
+static ssize_t crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long cnt;
+ int ret;
+
+ if (kstrtoul(buf, 0, &cnt))
+ return -EINVAL;
+
+ ret = crash_shrink_memory(cnt);
+ return ret < 0 ? ret : count;
+}
+static struct kobj_attribute crash_size_attr = __ATTR_RW(crash_size);
+
+#ifdef CONFIG_CRASH_HOTPLUG
+static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int sz = crash_get_elfcorehdr_size();
+
+ return sysfs_emit(buf, "%u\n", sz);
+}
+static struct kobj_attribute crash_elfcorehdr_size_attr = __ATTR_RO(crash_elfcorehdr_size);
+
+#endif /* CONFIG_CRASH_HOTPLUG */
+#endif /* CONFIG_CRASH_DUMP */
+
+static struct attribute *kexec_attrs[] = {
+ &loaded_attr.attr,
+#ifdef CONFIG_CRASH_DUMP
+ &crash_loaded_attr.attr,
+ &crash_size_attr.attr,
+#ifdef CONFIG_CRASH_RESERVE
+ &crash_cma_ranges_attr.attr,
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ &crash_elfcorehdr_size_attr.attr,
+#endif
+#endif
+ NULL
+};
+
+struct kexec_link_entry {
+ const char *target;
+ const char *name;
+};
+
+static struct kexec_link_entry kexec_links[] = {
+ { "loaded", "kexec_loaded" },
+#ifdef CONFIG_CRASH_DUMP
+ { "crash_loaded", "kexec_crash_loaded" },
+ { "crash_size", "kexec_crash_size" },
+#ifdef CONFIG_CRASH_RESERVE
+ {"crash_cma_ranges", "kexec_crash_cma_ranges"},
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ { "crash_elfcorehdr_size", "crash_elfcorehdr_size" },
+#endif
+#endif
+};
+
+static struct kobject *kexec_kobj;
+ATTRIBUTE_GROUPS(kexec);
+
+static int __init init_kexec_sysctl(void)
+{
+ int error;
+ int i;
+
+ kexec_kobj = kobject_create_and_add("kexec", kernel_kobj);
+ if (!kexec_kobj) {
+ pr_err("failed to create kexec kobject\n");
+ return -ENOMEM;
+ }
+
+ error = sysfs_create_groups(kexec_kobj, kexec_groups);
+ if (error)
+ goto kset_exit;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_links); i++) {
+ error = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, kexec_kobj,
+ kexec_links[i].target,
+ kexec_links[i].name);
+ if (error)
+ pr_err("Unable to create %s symlink (%d)", kexec_links[i].name, error);
+ }
+
+ return 0;
+
+kset_exit:
+ kobject_put(kexec_kobj);
+ return error;
+}
+
+subsys_initcall(init_kexec_sysctl);
diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
deleted file mode 100644
index 3c3c7148ceed..000000000000
--- a/kernel/kexec_handover_internal.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
-#define LINUX_KEXEC_HANDOVER_INTERNAL_H
-
-#include <linux/kexec_handover.h>
-#include <linux/types.h>
-
-extern struct kho_scratch *kho_scratch;
-extern unsigned int kho_scratch_cnt;
-
-#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
-bool kho_scratch_overlap(phys_addr_t phys, size_t size);
-#else
-static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
-{
- return false;
-}
-#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
-
-#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index eefb67d9883c..a9e6354d9e25 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -12,7 +12,7 @@
#include <linux/sysfs.h>
#include <linux/export.h>
#include <linux/init.h>
-#include <linux/kexec.h>
+#include <linux/vmcore_info.h>
#include <linux/profile.h>
#include <linux/stat.h>
#include <linux/sched.h>
@@ -119,50 +119,6 @@ static ssize_t profiling_store(struct kobject *kobj,
KERNEL_ATTR_RW(profiling);
#endif
-#ifdef CONFIG_KEXEC_CORE
-static ssize_t kexec_loaded_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "%d\n", !!kexec_image);
-}
-KERNEL_ATTR_RO(kexec_loaded);
-
-#ifdef CONFIG_CRASH_DUMP
-static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "%d\n", kexec_crash_loaded());
-}
-KERNEL_ATTR_RO(kexec_crash_loaded);
-
-static ssize_t kexec_crash_size_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- ssize_t size = crash_get_memory_size();
-
- if (size < 0)
- return size;
-
- return sysfs_emit(buf, "%zd\n", size);
-}
-static ssize_t kexec_crash_size_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned long cnt;
- int ret;
-
- if (kstrtoul(buf, 0, &cnt))
- return -EINVAL;
-
- ret = crash_shrink_memory(cnt);
- return ret < 0 ? ret : count;
-}
-KERNEL_ATTR_RW(kexec_crash_size);
-
-#endif /* CONFIG_CRASH_DUMP*/
-#endif /* CONFIG_KEXEC_CORE */
-
#ifdef CONFIG_VMCORE_INFO
static ssize_t vmcoreinfo_show(struct kobject *kobj,
@@ -174,18 +130,6 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(vmcoreinfo);
-#ifdef CONFIG_CRASH_HOTPLUG
-static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- unsigned int sz = crash_get_elfcorehdr_size();
-
- return sysfs_emit(buf, "%u\n", sz);
-}
-KERNEL_ATTR_RO(crash_elfcorehdr_size);
-
-#endif
-
#endif /* CONFIG_VMCORE_INFO */
/* whether file capabilities are enabled */
@@ -255,18 +199,8 @@ static struct attribute * kernel_attrs[] = {
#ifdef CONFIG_PROFILING
&profiling_attr.attr,
#endif
-#ifdef CONFIG_KEXEC_CORE
- &kexec_loaded_attr.attr,
-#ifdef CONFIG_CRASH_DUMP
- &kexec_crash_loaded_attr.attr,
- &kexec_crash_size_attr.attr,
-#endif
-#endif
#ifdef CONFIG_VMCORE_INFO
&vmcoreinfo_attr.attr,
-#ifdef CONFIG_CRASH_HOTPLUG
- &crash_elfcorehdr_size_attr.attr,
-#endif
#endif
#ifndef CONFIG_TINY_RCU
&rcu_expedited_attr.attr,
diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig
new file mode 100644
index 000000000000..9b2515f31afb
--- /dev/null
+++ b/kernel/liveupdate/Kconfig
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Live Update and Kexec HandOver"
+ depends on !DEFERRED_STRUCT_PAGE_INIT
+
+config KEXEC_HANDOVER
+ bool "kexec handover"
+ depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE
+ depends on !DEFERRED_STRUCT_PAGE_INIT
+ select MEMBLOCK_KHO_SCRATCH
+ select KEXEC_FILE
+ select LIBFDT
+ select CMA
+ help
+ Allow kexec to hand over state across kernels by generating and
+ passing additional metadata to the target kernel. This is useful
+ to keep data or state alive across the kexec. For this to work,
+ both source and target kernels need to have this option enabled.
+
+config KEXEC_HANDOVER_DEBUG
+ bool "Enable Kexec Handover debug checks"
+ depends on KEXEC_HANDOVER
+ help
+ This option enables extra sanity checks for the Kexec Handover
+ subsystem. Since, KHO performance is crucial in live update
+ scenarios and the extra code might be adding overhead it is
+ only optionally enabled.
+
+config KEXEC_HANDOVER_DEBUGFS
+ bool "kexec handover debugfs interface"
+ default KEXEC_HANDOVER
+ depends on KEXEC_HANDOVER
+ select DEBUG_FS
+ help
+ Allow to control kexec handover device tree via debugfs
+ interface, i.e. finalize the state or aborting the finalization.
+ Also, enables inspecting the KHO fdt trees with the debugfs binary
+ blobs.
+
+config KEXEC_HANDOVER_ENABLE_DEFAULT
+ bool "Enable kexec handover by default"
+ depends on KEXEC_HANDOVER
+ help
+ Enable Kexec Handover by default. This avoids the need to
+ explicitly pass 'kho=on' on the kernel command line.
+
+ This is useful for systems where KHO is a prerequisite for other
+ features, such as Live Update, ensuring the mechanism is always
+ active.
+
+ The default behavior can still be overridden at boot time by
+ passing 'kho=off'.
+
+config LIVEUPDATE
+ bool "Live Update Orchestrator"
+ depends on KEXEC_HANDOVER
+ help
+ Enable the Live Update Orchestrator. Live Update is a mechanism,
+ typically based on kexec, that allows the kernel to be updated
+ while keeping selected devices operational across the transition.
+ These devices are intended to be reclaimed by the new kernel and
+ re-attached to their original workload without requiring a device
+ reset.
+
+ Ability to handover a device from current to the next kernel depends
+ on specific support within device drivers and related kernel
+ subsystems.
+
+ This feature primarily targets virtual machine hosts to quickly update
+ the kernel hypervisor with minimal disruption to the running virtual
+ machines.
+
+ If unsure, say N.
+
+endmenu
diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile
new file mode 100644
index 000000000000..7cad2eece32d
--- /dev/null
+++ b/kernel/liveupdate/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+luo-y := \
+ luo_core.o \
+ luo_file.o \
+ luo_session.o
+
+obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
+obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
+obj-$(CONFIG_KEXEC_HANDOVER_DEBUGFS) += kexec_handover_debugfs.o
+
+obj-$(CONFIG_LIVEUPDATE) += luo.o
diff --git a/kernel/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 03d12e27189f..9dc51fab604f 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -4,21 +4,22 @@
* Copyright (C) 2023 Alexander Graf <graf@amazon.com>
* Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
* Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
+ * Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#define pr_fmt(fmt) "KHO: " fmt
#include <linux/cleanup.h>
#include <linux/cma.h>
+#include <linux/kmemleak.h>
#include <linux/count_zeros.h>
-#include <linux/debugfs.h>
#include <linux/kexec.h>
#include <linux/kexec_handover.h>
#include <linux/libfdt.h>
#include <linux/list.h>
#include <linux/memblock.h>
-#include <linux/notifier.h>
#include <linux/page-isolation.h>
+#include <linux/unaligned.h>
#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
@@ -28,8 +29,9 @@
* KHO is tightly coupled with mm init and needs access to some of mm
* internal APIs.
*/
-#include "../mm/internal.h"
-#include "kexec_internal.h"
+#include "../../mm/internal.h"
+#include "../kexec_internal.h"
+#include "kexec_handover_internal.h"
#define KHO_FDT_COMPATIBLE "kho-v1"
#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map"
@@ -51,7 +53,7 @@ union kho_page_info {
static_assert(sizeof(union kho_page_info) == sizeof(((struct page *)0)->private));
-static bool kho_enable __ro_after_init;
+static bool kho_enable __ro_after_init = IS_ENABLED(CONFIG_KEXEC_HANDOVER_ENABLE_DEFAULT);
bool kho_is_enabled(void)
{
@@ -103,34 +105,19 @@ struct kho_mem_track {
struct khoser_mem_chunk;
-struct kho_serialization {
- struct page *fdt;
- struct list_head fdt_list;
- struct dentry *sub_fdt_dir;
- struct kho_mem_track track;
- /* First chunk of serialized preserved memory map */
- struct khoser_mem_chunk *preserved_mem_map;
-};
-
struct kho_out {
- struct blocking_notifier_head chain_head;
-
- struct dentry *dir;
-
+ void *fdt;
+ bool finalized;
struct mutex lock; /* protects KHO FDT finalization */
- struct kho_serialization ser;
- bool finalized;
+ struct kho_mem_track track;
+ struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
- .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
.lock = __MUTEX_INITIALIZER(kho_out.lock),
- .ser = {
- .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list),
- .track = {
- .orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
- },
+ .track = {
+ .orders = XARRAY_INIT(kho_out.track.orders, 0),
},
.finalized = false,
};
@@ -159,26 +146,33 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
return no_free_ptr(elm);
}
-static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
- unsigned long end_pfn)
+static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
+ unsigned int order)
{
struct kho_mem_phys_bits *bits;
struct kho_mem_phys *physxa;
+ const unsigned long pfn_high = pfn >> order;
- while (pfn < end_pfn) {
- const unsigned int order =
- min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
- const unsigned long pfn_high = pfn >> order;
+ physxa = xa_load(&track->orders, order);
+ if (WARN_ON_ONCE(!physxa))
+ return;
- physxa = xa_load(&track->orders, order);
- if (WARN_ON_ONCE(!physxa))
- return;
+ bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+ if (WARN_ON_ONCE(!bits))
+ return;
- bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (WARN_ON_ONCE(!bits))
- return;
+ clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+}
- clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
+ unsigned long end_pfn)
+{
+ unsigned int order;
+
+ while (pfn < end_pfn) {
+ order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+
+ __kho_unpreserve_order(track, pfn, order);
pfn += 1 << order;
}
@@ -192,10 +186,6 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
const unsigned long pfn_high = pfn >> order;
might_sleep();
-
- if (kho_out.finalized)
- return -EBUSY;
-
physxa = xa_load(&track->orders, order);
if (!physxa) {
int err;
@@ -229,11 +219,11 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
return 0;
}
-static struct page *kho_restore_page(phys_addr_t phys)
+static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
{
struct page *page = pfn_to_online_page(PHYS_PFN(phys));
+ unsigned int nr_pages, ref_cnt;
union kho_page_info info;
- unsigned int nr_pages;
if (!page)
return NULL;
@@ -253,11 +243,16 @@ static struct page *kho_restore_page(phys_addr_t phys)
/* Head page gets refcount of 1. */
set_page_count(page, 1);
- /* For higher order folios, tail pages get a page count of zero. */
+ /*
+ * For higher order folios, tail pages get a page count of zero.
+ * For physically contiguous order-0 pages every pages gets a page
+ * count of 1
+ */
+ ref_cnt = is_folio ? 0 : 1;
for (unsigned int i = 1; i < nr_pages; i++)
- set_page_count(page + i, 0);
+ set_page_count(page + i, ref_cnt);
- if (info.order > 0)
+ if (is_folio && info.order)
prep_compound_page(page, info.order);
adjust_managed_page_count(page, nr_pages);
@@ -272,7 +267,7 @@ static struct page *kho_restore_page(phys_addr_t phys)
*/
struct folio *kho_restore_folio(phys_addr_t phys)
{
- struct page *page = kho_restore_page(phys);
+ struct page *page = kho_restore_page(phys, true);
return page ? page_folio(page) : NULL;
}
@@ -297,11 +292,10 @@ struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages)
while (pfn < end_pfn) {
const unsigned int order =
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
- struct page *page = kho_restore_page(PFN_PHYS(pfn));
+ struct page *page = kho_restore_page(PFN_PHYS(pfn), false);
if (!page)
return NULL;
- split_page(page, order);
pfn += 1 << order;
}
@@ -371,11 +365,32 @@ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
struct khoser_mem_chunk *tmp = chunk;
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- kfree(tmp);
+ free_page((unsigned long)tmp);
}
}
-static int kho_mem_serialize(struct kho_serialization *ser)
+/*
+ * Update memory map property, if old one is found discard it via
+ * kho_mem_ser_free().
+ */
+static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
+{
+ void *ptr;
+ u64 phys;
+
+ ptr = fdt_getprop_w(kho_out.fdt, 0, PROP_PRESERVED_MEMORY_MAP, NULL);
+
+ /* Check and discard previous memory map */
+ phys = get_unaligned((u64 *)ptr);
+ if (phys)
+ kho_mem_ser_free((struct khoser_mem_chunk *)phys_to_virt(phys));
+
+ /* Update with the new value */
+ phys = first_chunk ? (u64)virt_to_phys(first_chunk) : 0;
+ put_unaligned(phys, (u64 *)ptr);
+}
+
+static int kho_mem_serialize(struct kho_out *kho_out)
{
struct khoser_mem_chunk *first_chunk = NULL;
struct khoser_mem_chunk *chunk = NULL;
@@ -383,7 +398,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
unsigned long order;
int err = -ENOMEM;
- xa_for_each(&ser->track.orders, order, physxa) {
+ xa_for_each(&kho_out->track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
@@ -415,7 +430,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
}
}
- ser->preserved_mem_map = first_chunk;
+ kho_update_memory_map(first_chunk);
return 0;
@@ -445,20 +460,27 @@ static void __init deserialize_bitmap(unsigned int order,
}
}
-static void __init kho_mem_deserialize(const void *fdt)
+/* Return true if memory was deserizlied */
+static bool __init kho_mem_deserialize(const void *fdt)
{
struct khoser_mem_chunk *chunk;
- const phys_addr_t *mem;
+ const void *mem_ptr;
+ u64 mem;
int len;
- mem = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
-
- if (!mem || len != sizeof(*mem)) {
+ mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
+ if (!mem_ptr || len != sizeof(u64)) {
pr_err("failed to get preserved memory bitmaps\n");
- return;
+ return false;
}
- chunk = *mem ? phys_to_virt(*mem) : NULL;
+ mem = get_unaligned((const u64 *)mem_ptr);
+ chunk = mem ? phys_to_virt(mem) : NULL;
+
+ /* No preserved physical pages were passed, no deserialization */
+ if (!chunk)
+ return false;
+
while (chunk) {
unsigned int i;
@@ -467,6 +489,8 @@ static void __init kho_mem_deserialize(const void *fdt)
&chunk->bitmaps[i]);
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
}
+
+ return true;
}
/*
@@ -674,40 +698,8 @@ err_disable_kho:
kho_enable = false;
}
-struct fdt_debugfs {
- struct list_head list;
- struct debugfs_blob_wrapper wrapper;
- struct dentry *file;
-};
-
-static int kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
- const char *name, const void *fdt)
-{
- struct fdt_debugfs *f;
- struct dentry *file;
-
- f = kmalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
- return -ENOMEM;
-
- f->wrapper.data = (void *)fdt;
- f->wrapper.size = fdt_totalsize(fdt);
-
- file = debugfs_create_blob(name, 0400, dir, &f->wrapper);
- if (IS_ERR(file)) {
- kfree(f);
- return PTR_ERR(file);
- }
-
- f->file = file;
- list_add(&f->list, list);
-
- return 0;
-}
-
/**
* kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
- * @ser: serialization control object passed by KHO notifiers.
* @name: name of the sub tree.
* @fdt: the sub tree blob.
*
@@ -716,38 +708,76 @@ static int kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
* by KHO for the new kernel to retrieve it after kexec.
*
* A debugfs blob entry is also created at
- * ``/sys/kernel/debug/kho/out/sub_fdts/@name``.
+ * ``/sys/kernel/debug/kho/out/sub_fdts/@name`` when kernel is configured with
+ * CONFIG_KEXEC_HANDOVER_DEBUGFS
*
* Return: 0 on success, error code on failure
*/
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
+int kho_add_subtree(const char *name, void *fdt)
{
- int err = 0;
- u64 phys = (u64)virt_to_phys(fdt);
- void *root = page_to_virt(ser->fdt);
+ phys_addr_t phys = virt_to_phys(fdt);
+ void *root_fdt = kho_out.fdt;
+ int err = -ENOMEM;
+ int off, fdt_err;
- err |= fdt_begin_node(root, name);
- err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
- err |= fdt_end_node(root);
+ guard(mutex)(&kho_out.lock);
- if (err)
+ fdt_err = fdt_open_into(root_fdt, root_fdt, PAGE_SIZE);
+ if (fdt_err < 0)
return err;
- return kho_debugfs_fdt_add(&ser->fdt_list, ser->sub_fdt_dir, name, fdt);
+ off = fdt_add_subnode(root_fdt, 0, name);
+ if (off < 0) {
+ if (off == -FDT_ERR_EXISTS)
+ err = -EEXIST;
+ goto out_pack;
+ }
+
+ err = fdt_setprop(root_fdt, off, PROP_SUB_FDT, &phys, sizeof(phys));
+ if (err < 0)
+ goto out_pack;
+
+ WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false));
+
+out_pack:
+ fdt_pack(root_fdt);
+
+ return err;
}
EXPORT_SYMBOL_GPL(kho_add_subtree);
-int register_kho_notifier(struct notifier_block *nb)
+void kho_remove_subtree(void *fdt)
{
- return blocking_notifier_chain_register(&kho_out.chain_head, nb);
-}
-EXPORT_SYMBOL_GPL(register_kho_notifier);
+ phys_addr_t target_phys = virt_to_phys(fdt);
+ void *root_fdt = kho_out.fdt;
+ int off;
+ int err;
-int unregister_kho_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
+ guard(mutex)(&kho_out.lock);
+
+ err = fdt_open_into(root_fdt, root_fdt, PAGE_SIZE);
+ if (err < 0)
+ return;
+
+ for (off = fdt_first_subnode(root_fdt, 0); off >= 0;
+ off = fdt_next_subnode(root_fdt, off)) {
+ const u64 *val;
+ int len;
+
+ val = fdt_getprop(root_fdt, off, PROP_SUB_FDT, &len);
+ if (!val || len != sizeof(phys_addr_t))
+ continue;
+
+ if ((phys_addr_t)*val == target_phys) {
+ fdt_del_node(root_fdt, off);
+ kho_debugfs_fdt_remove(&kho_out.dbg, fdt);
+ break;
+ }
+ }
+
+ fdt_pack(root_fdt);
}
-EXPORT_SYMBOL_GPL(unregister_kho_notifier);
+EXPORT_SYMBOL_GPL(kho_remove_subtree);
/**
* kho_preserve_folio - preserve a folio across kexec.
@@ -762,7 +792,7 @@ int kho_preserve_folio(struct folio *folio)
{
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
return -EINVAL;
@@ -772,6 +802,24 @@ int kho_preserve_folio(struct folio *folio)
EXPORT_SYMBOL_GPL(kho_preserve_folio);
/**
+ * kho_unpreserve_folio - unpreserve a folio.
+ * @folio: folio to unpreserve.
+ *
+ * Instructs KHO to unpreserve a folio that was preserved by
+ * kho_preserve_folio() before. The provided @folio (pfn and order)
+ * must exactly match a previously preserved folio.
+ */
+void kho_unpreserve_folio(struct folio *folio)
+{
+ const unsigned long pfn = folio_pfn(folio);
+ const unsigned int order = folio_order(folio);
+ struct kho_mem_track *track = &kho_out.track;
+
+ __kho_unpreserve_order(track, pfn, order);
+}
+EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
+
+/**
* kho_preserve_pages - preserve contiguous pages across kexec
* @page: first page in the list.
* @nr_pages: number of pages.
@@ -783,7 +831,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
*/
int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn = start_pfn;
@@ -815,6 +863,26 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages)
}
EXPORT_SYMBOL_GPL(kho_preserve_pages);
+/**
+ * kho_unpreserve_pages - unpreserve contiguous pages.
+ * @page: first page in the list.
+ * @nr_pages: number of pages.
+ *
+ * Instructs KHO to unpreserve @nr_pages contiguous pages starting from @page.
+ * This must be called with the same @page and @nr_pages as the corresponding
+ * kho_preserve_pages() call. Unpreserving arbitrary sub-ranges of larger
+ * preserved blocks is not supported.
+ */
+void kho_unpreserve_pages(struct page *page, unsigned int nr_pages)
+{
+ struct kho_mem_track *track = &kho_out.track;
+ const unsigned long start_pfn = page_to_pfn(page);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+
+ __kho_unpreserve(track, start_pfn, end_pfn);
+}
+EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
+
struct kho_vmalloc_hdr {
DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
};
@@ -885,7 +953,7 @@ err_free:
static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
unsigned short order)
{
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
__kho_unpreserve(track, pfn, pfn + 1);
@@ -896,20 +964,6 @@ static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
}
}
-static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc)
-{
- struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(kho_vmalloc->first);
-
- while (chunk) {
- struct kho_vmalloc_chunk *tmp = chunk;
-
- kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order);
-
- chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- free_page((unsigned long)tmp);
- }
-}
-
/**
* kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec
* @ptr: pointer to the area in vmalloc address space
@@ -971,12 +1025,34 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation)
return 0;
err_free:
- kho_vmalloc_free_chunks(preservation);
+ kho_unpreserve_vmalloc(preservation);
return err;
}
EXPORT_SYMBOL_GPL(kho_preserve_vmalloc);
/**
+ * kho_unpreserve_vmalloc - unpreserve memory allocated with vmalloc()
+ * @preservation: preservation metadata returned by kho_preserve_vmalloc()
+ *
+ * Instructs KHO to unpreserve the area in vmalloc address space that was
+ * previously preserved with kho_preserve_vmalloc().
+ */
+void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation)
+{
+ struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first);
+
+ while (chunk) {
+ struct kho_vmalloc_chunk *tmp = chunk;
+
+ kho_vmalloc_unpreserve_chunk(chunk, preservation->order);
+
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ free_page((unsigned long)tmp);
+ }
+}
+EXPORT_SYMBOL_GPL(kho_unpreserve_vmalloc);
+
+/**
* kho_restore_vmalloc - recreates and populates an area in vmalloc address
* space from the preserved memory.
* @preservation: preservation metadata.
@@ -1024,7 +1100,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
goto err_free_pages_array;
for (int j = 0; j < contig_pages; j++)
- pages[idx++] = page;
+ pages[idx++] = page + j;
phys += contig_pages * PAGE_SIZE;
}
@@ -1065,217 +1141,122 @@ err_free_pages_array:
}
EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
-/* Handling for debug/kho/out */
-
-static struct dentry *debugfs_root;
-
-static int kho_out_update_debugfs_fdt(void)
-{
- int err = 0;
- struct fdt_debugfs *ff, *tmp;
-
- if (kho_out.finalized) {
- err = kho_debugfs_fdt_add(&kho_out.ser.fdt_list, kho_out.dir,
- "fdt", page_to_virt(kho_out.ser.fdt));
- } else {
- list_for_each_entry_safe(ff, tmp, &kho_out.ser.fdt_list, list) {
- debugfs_remove(ff->file);
- list_del(&ff->list);
- kfree(ff);
- }
- }
-
- return err;
-}
-
-static int kho_abort(void)
+/**
+ * kho_alloc_preserve - Allocate, zero, and preserve memory.
+ * @size: The number of bytes to allocate.
+ *
+ * Allocates a physically contiguous block of zeroed pages that is large
+ * enough to hold @size bytes. The allocated memory is then registered with
+ * KHO for preservation across a kexec.
+ *
+ * Note: The actual allocated size will be rounded up to the nearest
+ * power-of-two page boundary.
+ *
+ * @return A virtual pointer to the allocated and preserved memory on success,
+ * or an ERR_PTR() encoded error on failure.
+ */
+void *kho_alloc_preserve(size_t size)
{
- int err;
- unsigned long order;
- struct kho_mem_phys *physxa;
+ struct folio *folio;
+ int order, ret;
- xa_for_each(&kho_out.ser.track.orders, order, physxa) {
- struct kho_mem_phys_bits *bits;
- unsigned long phys;
+ if (!size)
+ return ERR_PTR(-EINVAL);
- xa_for_each(&physxa->phys_bits, phys, bits)
- kfree(bits);
+ order = get_order(size);
+ if (order > MAX_PAGE_ORDER)
+ return ERR_PTR(-E2BIG);
- xa_destroy(&physxa->phys_bits);
- kfree(physxa);
- }
- xa_destroy(&kho_out.ser.track.orders);
+ folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
+ if (!folio)
+ return ERR_PTR(-ENOMEM);
- if (kho_out.ser.preserved_mem_map) {
- kho_mem_ser_free(kho_out.ser.preserved_mem_map);
- kho_out.ser.preserved_mem_map = NULL;
+ ret = kho_preserve_folio(folio);
+ if (ret) {
+ folio_put(folio);
+ return ERR_PTR(ret);
}
- err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT,
- NULL);
- err = notifier_to_errno(err);
-
- if (err)
- pr_err("Failed to abort KHO finalization: %d\n", err);
-
- return err;
+ return folio_address(folio);
}
+EXPORT_SYMBOL_GPL(kho_alloc_preserve);
-static int kho_finalize(void)
+/**
+ * kho_unpreserve_free - Unpreserve and free memory.
+ * @mem: Pointer to the memory allocated by kho_alloc_preserve().
+ *
+ * Unregisters the memory from KHO preservation and frees the underlying
+ * pages back to the system. This function should be called to clean up
+ * memory allocated with kho_alloc_preserve().
+ */
+void kho_unpreserve_free(void *mem)
{
- int err = 0;
- u64 *preserved_mem_map;
- void *fdt = page_to_virt(kho_out.ser.fdt);
-
- err |= fdt_create(fdt, PAGE_SIZE);
- err |= fdt_finish_reservemap(fdt);
- err |= fdt_begin_node(fdt, "");
- err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
- /**
- * Reserve the preserved-memory-map property in the root FDT, so
- * that all property definitions will precede subnodes created by
- * KHO callers.
- */
- err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
- sizeof(*preserved_mem_map),
- (void **)&preserved_mem_map);
- if (err)
- goto abort;
-
- err = kho_preserve_folio(page_folio(kho_out.ser.fdt));
- if (err)
- goto abort;
-
- err = blocking_notifier_call_chain(&kho_out.chain_head,
- KEXEC_KHO_FINALIZE, &kho_out.ser);
- err = notifier_to_errno(err);
- if (err)
- goto abort;
-
- err = kho_mem_serialize(&kho_out.ser);
- if (err)
- goto abort;
-
- *preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
-
- err |= fdt_end_node(fdt);
- err |= fdt_finish(fdt);
+ struct folio *folio;
-abort:
- if (err) {
- pr_err("Failed to convert KHO state tree: %d\n", err);
- kho_abort();
- }
+ if (!mem)
+ return;
- return err;
+ folio = virt_to_folio(mem);
+ kho_unpreserve_folio(folio);
+ folio_put(folio);
}
+EXPORT_SYMBOL_GPL(kho_unpreserve_free);
-static int kho_out_finalize_get(void *data, u64 *val)
+/**
+ * kho_restore_free - Restore and free memory after kexec.
+ * @mem: Pointer to the memory (in the new kernel's address space)
+ * that was allocated by the old kernel.
+ *
+ * This function is intended to be called in the new kernel (post-kexec)
+ * to take ownership of and free a memory region that was preserved by the
+ * old kernel using kho_alloc_preserve().
+ *
+ * It first restores the pages from KHO (using their physical address)
+ * and then frees the pages back to the new kernel's page allocator.
+ */
+void kho_restore_free(void *mem)
{
- mutex_lock(&kho_out.lock);
- *val = kho_out.finalized;
- mutex_unlock(&kho_out.lock);
+ struct folio *folio;
- return 0;
+ if (!mem)
+ return;
+
+ folio = kho_restore_folio(__pa(mem));
+ if (!WARN_ON(!folio))
+ folio_put(folio);
}
+EXPORT_SYMBOL_GPL(kho_restore_free);
-static int kho_out_finalize_set(void *data, u64 _val)
+int kho_finalize(void)
{
- int ret = 0;
- bool val = !!_val;
-
- mutex_lock(&kho_out.lock);
-
- if (val == kho_out.finalized) {
- if (kho_out.finalized)
- ret = -EEXIST;
- else
- ret = -ENOENT;
- goto unlock;
- }
+ int ret;
- if (val)
- ret = kho_finalize();
- else
- ret = kho_abort();
+ if (!kho_enable)
+ return -EOPNOTSUPP;
+ guard(mutex)(&kho_out.lock);
+ ret = kho_mem_serialize(&kho_out);
if (ret)
- goto unlock;
-
- kho_out.finalized = val;
- ret = kho_out_update_debugfs_fdt();
-
-unlock:
- mutex_unlock(&kho_out.lock);
- return ret;
-}
-
-DEFINE_DEBUGFS_ATTRIBUTE(fops_kho_out_finalize, kho_out_finalize_get,
- kho_out_finalize_set, "%llu\n");
+ return ret;
-static int scratch_phys_show(struct seq_file *m, void *v)
-{
- for (int i = 0; i < kho_scratch_cnt; i++)
- seq_printf(m, "0x%llx\n", kho_scratch[i].addr);
+ kho_out.finalized = true;
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(scratch_phys);
-static int scratch_len_show(struct seq_file *m, void *v)
+bool kho_finalized(void)
{
- for (int i = 0; i < kho_scratch_cnt; i++)
- seq_printf(m, "0x%llx\n", kho_scratch[i].size);
-
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(scratch_len);
-
-static __init int kho_out_debugfs_init(void)
-{
- struct dentry *dir, *f, *sub_fdt_dir;
-
- dir = debugfs_create_dir("out", debugfs_root);
- if (IS_ERR(dir))
- return -ENOMEM;
-
- sub_fdt_dir = debugfs_create_dir("sub_fdts", dir);
- if (IS_ERR(sub_fdt_dir))
- goto err_rmdir;
-
- f = debugfs_create_file("scratch_phys", 0400, dir, NULL,
- &scratch_phys_fops);
- if (IS_ERR(f))
- goto err_rmdir;
-
- f = debugfs_create_file("scratch_len", 0400, dir, NULL,
- &scratch_len_fops);
- if (IS_ERR(f))
- goto err_rmdir;
-
- f = debugfs_create_file("finalize", 0600, dir, NULL,
- &fops_kho_out_finalize);
- if (IS_ERR(f))
- goto err_rmdir;
-
- kho_out.dir = dir;
- kho_out.ser.sub_fdt_dir = sub_fdt_dir;
- return 0;
-
-err_rmdir:
- debugfs_remove_recursive(dir);
- return -ENOENT;
+ guard(mutex)(&kho_out.lock);
+ return kho_out.finalized;
}
struct kho_in {
- struct dentry *dir;
phys_addr_t fdt_phys;
phys_addr_t scratch_phys;
- struct list_head fdt_list;
+ struct kho_debugfs dbg;
};
static struct kho_in kho_in = {
- .fdt_list = LIST_HEAD_INIT(kho_in.fdt_list),
};
static const void *kho_get_fdt(void)
@@ -1339,91 +1320,52 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
}
EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
-/* Handling for debugfs/kho/in */
-
-static __init int kho_in_debugfs_init(const void *fdt)
+static __init int kho_out_fdt_setup(void)
{
- struct dentry *sub_fdt_dir;
- int err, child;
-
- kho_in.dir = debugfs_create_dir("in", debugfs_root);
- if (IS_ERR(kho_in.dir))
- return PTR_ERR(kho_in.dir);
-
- sub_fdt_dir = debugfs_create_dir("sub_fdts", kho_in.dir);
- if (IS_ERR(sub_fdt_dir)) {
- err = PTR_ERR(sub_fdt_dir);
- goto err_rmdir;
- }
-
- err = kho_debugfs_fdt_add(&kho_in.fdt_list, kho_in.dir, "fdt", fdt);
- if (err)
- goto err_rmdir;
-
- fdt_for_each_subnode(child, fdt, 0) {
- int len = 0;
- const char *name = fdt_get_name(fdt, child, NULL);
- const u64 *fdt_phys;
-
- fdt_phys = fdt_getprop(fdt, child, "fdt", &len);
- if (!fdt_phys)
- continue;
- if (len != sizeof(*fdt_phys)) {
- pr_warn("node `%s`'s prop `fdt` has invalid length: %d\n",
- name, len);
- continue;
- }
- err = kho_debugfs_fdt_add(&kho_in.fdt_list, sub_fdt_dir, name,
- phys_to_virt(*fdt_phys));
- if (err) {
- pr_warn("failed to add fdt `%s` to debugfs: %d\n", name,
- err);
- continue;
- }
- }
+ void *root = kho_out.fdt;
+ u64 empty_mem_map = 0;
+ int err;
- return 0;
+ err = fdt_create(root, PAGE_SIZE);
+ err |= fdt_finish_reservemap(root);
+ err |= fdt_begin_node(root, "");
+ err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
+ err |= fdt_property(root, PROP_PRESERVED_MEMORY_MAP, &empty_mem_map,
+ sizeof(empty_mem_map));
+ err |= fdt_end_node(root);
+ err |= fdt_finish(root);
-err_rmdir:
- debugfs_remove_recursive(kho_in.dir);
return err;
}
static __init int kho_init(void)
{
- int err = 0;
const void *fdt = kho_get_fdt();
+ int err = 0;
if (!kho_enable)
return 0;
- kho_out.ser.fdt = alloc_page(GFP_KERNEL);
- if (!kho_out.ser.fdt) {
- err = -ENOMEM;
+ kho_out.fdt = kho_alloc_preserve(PAGE_SIZE);
+ if (IS_ERR(kho_out.fdt)) {
+ err = PTR_ERR(kho_out.fdt);
goto err_free_scratch;
}
- debugfs_root = debugfs_create_dir("kho", NULL);
- if (IS_ERR(debugfs_root)) {
- err = -ENOENT;
+ err = kho_debugfs_init();
+ if (err)
goto err_free_fdt;
- }
- err = kho_out_debugfs_init();
+ err = kho_out_debugfs_init(&kho_out.dbg);
if (err)
goto err_free_fdt;
- if (fdt) {
- err = kho_in_debugfs_init(fdt);
- /*
- * Failure to create /sys/kernel/debug/kho/in does not prevent
- * reviving state from KHO and setting up KHO for the next
- * kexec.
- */
- if (err)
- pr_err("failed exposing handover FDT in debugfs: %d\n",
- err);
+ err = kho_out_fdt_setup();
+ if (err)
+ goto err_free_fdt;
+ if (fdt) {
+ kho_in_debugfs_init(&kho_in.dbg, fdt);
return 0;
}
@@ -1432,17 +1374,29 @@ static __init int kho_init(void)
unsigned long count = kho_scratch[i].size >> PAGE_SHIFT;
unsigned long pfn;
+ /*
+ * When debug_pagealloc is enabled, __free_pages() clears the
+ * corresponding PRESENT bit in the kernel page table.
+ * Subsequent kmemleak scans of these pages cause the
+ * non-PRESENT page faults.
+ * Mark scratch areas with kmemleak_ignore_phys() to exclude
+ * them from kmemleak scanning.
+ */
+ kmemleak_ignore_phys(kho_scratch[i].addr);
for (pfn = base_pfn; pfn < base_pfn + count;
pfn += pageblock_nr_pages)
init_cma_reserved_pageblock(pfn_to_page(pfn));
}
+ WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
+ kho_out.fdt, true));
+
return 0;
err_free_fdt:
- put_page(kho_out.ser.fdt);
- kho_out.ser.fdt = NULL;
+ kho_unpreserve_free(kho_out.fdt);
err_free_scratch:
+ kho_out.fdt = NULL;
for (int i = 0; i < kho_scratch_cnt; i++) {
void *start = __va(kho_scratch[i].addr);
void *end = start + kho_scratch[i].size;
@@ -1452,7 +1406,7 @@ err_free_scratch:
kho_enable = false;
return err;
}
-late_initcall(kho_init);
+fs_initcall(kho_init);
static void __init kho_release_scratch(void)
{
@@ -1480,16 +1434,12 @@ static void __init kho_release_scratch(void)
void __init kho_memory_init(void)
{
- struct folio *folio;
-
if (kho_in.scratch_phys) {
kho_scratch = phys_to_virt(kho_in.scratch_phys);
kho_release_scratch();
- kho_mem_deserialize(kho_get_fdt());
- folio = kho_restore_folio(kho_in.fdt_phys);
- if (!folio)
- pr_warn("failed to restore folio for KHO fdt\n");
+ if (!kho_mem_deserialize(kho_get_fdt()))
+ kho_in.fdt_phys = 0;
} else {
kho_reserve_scratch();
}
@@ -1545,8 +1495,8 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
memblock_add(area->addr, size);
err = memblock_mark_kho_scratch(area->addr, size);
if (WARN_ON(err)) {
- pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %d",
- &area->addr, &size, err);
+ pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
+ &area->addr, &size, ERR_PTR(err));
goto out;
}
pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
@@ -1566,7 +1516,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
kho_in.fdt_phys = fdt_phys;
kho_in.scratch_phys = scratch_phys;
kho_scratch_cnt = scratch_cnt;
- pr_info("found kexec handover data. Will skip init for some devices\n");
+ pr_info("found kexec handover data.\n");
out:
if (fdt)
@@ -1585,10 +1535,10 @@ int kho_fill_kimage(struct kimage *image)
int err = 0;
struct kexec_buf scratch;
- if (!kho_out.finalized)
+ if (!kho_enable)
return 0;
- image->kho.fdt = page_to_phys(kho_out.ser.fdt);
+ image->kho.fdt = virt_to_phys(kho_out.fdt);
scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt;
scratch = (struct kexec_buf){
diff --git a/kernel/kexec_handover_debug.c b/kernel/liveupdate/kexec_handover_debug.c
index 6efb696f5426..6efb696f5426 100644
--- a/kernel/kexec_handover_debug.c
+++ b/kernel/liveupdate/kexec_handover_debug.c
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
new file mode 100644
index 000000000000..2abbf62ba942
--- /dev/null
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_handover_debugfs.c - kexec handover debugfs interfaces
+ * Copyright (C) 2023 Alexander Graf <graf@amazon.com>
+ * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
+ * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
+ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define pr_fmt(fmt) "KHO: " fmt
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include "kexec_handover_internal.h"
+
+static struct dentry *debugfs_root;
+
+struct fdt_debugfs {
+ struct list_head list;
+ struct debugfs_blob_wrapper wrapper;
+ struct dentry *file;
+};
+
+static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir,
+ const char *name, const void *fdt)
+{
+ struct fdt_debugfs *f;
+ struct dentry *file;
+
+ f = kmalloc(sizeof(*f), GFP_KERNEL);
+ if (!f)
+ return -ENOMEM;
+
+ f->wrapper.data = (void *)fdt;
+ f->wrapper.size = fdt_totalsize(fdt);
+
+ file = debugfs_create_blob(name, 0400, dir, &f->wrapper);
+ if (IS_ERR(file)) {
+ kfree(f);
+ return PTR_ERR(file);
+ }
+
+ f->file = file;
+ list_add(&f->list, list);
+
+ return 0;
+}
+
+int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
+ const void *fdt, bool root)
+{
+ struct dentry *dir;
+
+ if (root)
+ dir = dbg->dir;
+ else
+ dir = dbg->sub_fdt_dir;
+
+ return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt);
+}
+
+void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
+{
+ struct fdt_debugfs *ff;
+
+ list_for_each_entry(ff, &dbg->fdt_list, list) {
+ if (ff->wrapper.data == fdt) {
+ debugfs_remove(ff->file);
+ list_del(&ff->list);
+ kfree(ff);
+ break;
+ }
+ }
+}
+
+static int kho_out_finalize_get(void *data, u64 *val)
+{
+ *val = kho_finalized();
+
+ return 0;
+}
+
+static int kho_out_finalize_set(void *data, u64 val)
+{
+ if (val)
+ return kho_finalize();
+ else
+ return -EINVAL;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(kho_out_finalize_fops, kho_out_finalize_get,
+ kho_out_finalize_set, "%llu\n");
+
+static int scratch_phys_show(struct seq_file *m, void *v)
+{
+ for (int i = 0; i < kho_scratch_cnt; i++)
+ seq_printf(m, "0x%llx\n", kho_scratch[i].addr);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(scratch_phys);
+
+static int scratch_len_show(struct seq_file *m, void *v)
+{
+ for (int i = 0; i < kho_scratch_cnt; i++)
+ seq_printf(m, "0x%llx\n", kho_scratch[i].size);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(scratch_len);
+
+__init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
+{
+ struct dentry *dir, *sub_fdt_dir;
+ int err, child;
+
+ INIT_LIST_HEAD(&dbg->fdt_list);
+
+ dir = debugfs_create_dir("in", debugfs_root);
+ if (IS_ERR(dir)) {
+ err = PTR_ERR(dir);
+ goto err_out;
+ }
+
+ sub_fdt_dir = debugfs_create_dir("sub_fdts", dir);
+ if (IS_ERR(sub_fdt_dir)) {
+ err = PTR_ERR(sub_fdt_dir);
+ goto err_rmdir;
+ }
+
+ err = __kho_debugfs_fdt_add(&dbg->fdt_list, dir, "fdt", fdt);
+ if (err)
+ goto err_rmdir;
+
+ fdt_for_each_subnode(child, fdt, 0) {
+ int len = 0;
+ const char *name = fdt_get_name(fdt, child, NULL);
+ const u64 *fdt_phys;
+
+ fdt_phys = fdt_getprop(fdt, child, "fdt", &len);
+ if (!fdt_phys)
+ continue;
+ if (len != sizeof(*fdt_phys)) {
+ pr_warn("node %s prop fdt has invalid length: %d\n",
+ name, len);
+ continue;
+ }
+ err = __kho_debugfs_fdt_add(&dbg->fdt_list, sub_fdt_dir, name,
+ phys_to_virt(*fdt_phys));
+ if (err) {
+ pr_warn("failed to add fdt %s to debugfs: %pe\n", name,
+ ERR_PTR(err));
+ continue;
+ }
+ }
+
+ dbg->dir = dir;
+ dbg->sub_fdt_dir = sub_fdt_dir;
+
+ return;
+err_rmdir:
+ debugfs_remove_recursive(dir);
+err_out:
+ /*
+ * Failure to create /sys/kernel/debug/kho/in does not prevent
+ * reviving state from KHO and setting up KHO for the next
+ * kexec.
+ */
+ if (err) {
+ pr_err("failed exposing handover FDT in debugfs: %pe\n",
+ ERR_PTR(err));
+ }
+}
+
+__init int kho_out_debugfs_init(struct kho_debugfs *dbg)
+{
+ struct dentry *dir, *f, *sub_fdt_dir;
+
+ INIT_LIST_HEAD(&dbg->fdt_list);
+
+ dir = debugfs_create_dir("out", debugfs_root);
+ if (IS_ERR(dir))
+ return -ENOMEM;
+
+ sub_fdt_dir = debugfs_create_dir("sub_fdts", dir);
+ if (IS_ERR(sub_fdt_dir))
+ goto err_rmdir;
+
+ f = debugfs_create_file("scratch_phys", 0400, dir, NULL,
+ &scratch_phys_fops);
+ if (IS_ERR(f))
+ goto err_rmdir;
+
+ f = debugfs_create_file("scratch_len", 0400, dir, NULL,
+ &scratch_len_fops);
+ if (IS_ERR(f))
+ goto err_rmdir;
+
+ f = debugfs_create_file("finalize", 0600, dir, NULL,
+ &kho_out_finalize_fops);
+ if (IS_ERR(f))
+ goto err_rmdir;
+
+ dbg->dir = dir;
+ dbg->sub_fdt_dir = sub_fdt_dir;
+ return 0;
+
+err_rmdir:
+ debugfs_remove_recursive(dir);
+ return -ENOENT;
+}
+
+__init int kho_debugfs_init(void)
+{
+ debugfs_root = debugfs_create_dir("kho", NULL);
+ if (IS_ERR(debugfs_root))
+ return -ENOENT;
+ return 0;
+}
diff --git a/kernel/liveupdate/kexec_handover_internal.h b/kernel/liveupdate/kexec_handover_internal.h
new file mode 100644
index 000000000000..0202c85ad14f
--- /dev/null
+++ b/kernel/liveupdate/kexec_handover_internal.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
+#define LINUX_KEXEC_HANDOVER_INTERNAL_H
+
+#include <linux/kexec_handover.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUGFS
+#include <linux/debugfs.h>
+
+struct kho_debugfs {
+ struct dentry *dir;
+ struct dentry *sub_fdt_dir;
+ struct list_head fdt_list;
+};
+
+#else
+struct kho_debugfs {};
+#endif
+
+extern struct kho_scratch *kho_scratch;
+extern unsigned int kho_scratch_cnt;
+
+bool kho_finalized(void);
+int kho_finalize(void);
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUGFS
+int kho_debugfs_init(void);
+void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
+int kho_out_debugfs_init(struct kho_debugfs *dbg);
+int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
+ const void *fdt, bool root);
+void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
+#else
+static inline int kho_debugfs_init(void) { return 0; }
+static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
+ const void *fdt) { }
+static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
+static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
+ const void *fdt, bool root) { return 0; }
+static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
+ void *fdt) { }
+#endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
+bool kho_scratch_overlap(phys_addr_t phys, size_t size);
+#else
+static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+ return false;
+}
+#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
+
+#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
new file mode 100644
index 000000000000..f7ecaf7740d1
--- /dev/null
+++ b/kernel/liveupdate/luo_core.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: Live Update Orchestrator (LUO)
+ *
+ * Live Update is a specialized, kexec-based reboot process that allows a
+ * running kernel to be updated from one version to another while preserving
+ * the state of selected resources and keeping designated hardware devices
+ * operational. For these devices, DMA activity may continue throughout the
+ * kernel transition.
+ *
+ * While the primary use case driving this work is supporting live updates of
+ * the Linux kernel when it is used as a hypervisor in cloud environments, the
+ * LUO framework itself is designed to be workload-agnostic. Live Update
+ * facilitates a full kernel version upgrade for any type of system.
+ *
+ * For example, a non-hypervisor system running an in-memory cache like
+ * memcached with many gigabytes of data can use LUO. The userspace service
+ * can place its cache into a memfd, have its state preserved by LUO, and
+ * restore it immediately after the kernel kexec.
+ *
+ * Whether the system is running virtual machines, containers, a
+ * high-performance database, or networking services, LUO's primary goal is to
+ * enable a full kernel update by preserving critical userspace state and
+ * keeping essential devices operational.
+ *
+ * The core of LUO is a mechanism that tracks the progress of a live update,
+ * along with a callback API that allows other kernel subsystems to participate
+ * in the process. Example subsystems that can hook into LUO include: kvm,
+ * iommu, interrupts, vfio, participating filesystems, and memory management.
+ *
+ * LUO uses Kexec Handover to transfer memory state from the current kernel to
+ * the next kernel. For more details see
+ * Documentation/core-api/kho/concepts.rst.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/atomic.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/kobject.h>
+#include <linux/libfdt.h>
+#include <linux/liveupdate.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+
+#include "kexec_handover_internal.h"
+#include "luo_internal.h"
+
+static struct {
+ bool enabled;
+ void *fdt_out;
+ void *fdt_in;
+ u64 liveupdate_num;
+} luo_global;
+
+static int __init early_liveupdate_param(char *buf)
+{
+ return kstrtobool(buf, &luo_global.enabled);
+}
+early_param("liveupdate", early_liveupdate_param);
+
+static int __init luo_early_startup(void)
+{
+ phys_addr_t fdt_phys;
+ int err, ln_size;
+ const void *ptr;
+
+ if (!kho_is_enabled()) {
+ if (liveupdate_enabled())
+ pr_warn("Disabling liveupdate because KHO is disabled\n");
+ luo_global.enabled = false;
+ return 0;
+ }
+
+ /* Retrieve LUO subtree, and verify its format. */
+ err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
+ LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
+ return err;
+ }
+
+ return 0;
+ }
+
+ luo_global.fdt_in = phys_to_virt(fdt_phys);
+ err = fdt_node_check_compatible(luo_global.fdt_in, 0,
+ LUO_FDT_COMPATIBLE);
+ if (err) {
+ pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
+ LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
+
+ return -EINVAL;
+ }
+
+ ln_size = 0;
+ ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
+ &ln_size);
+ if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
+ pr_err("Unable to get live update number '%s' [%d]\n",
+ LUO_FDT_LIVEUPDATE_NUM, ln_size);
+
+ return -EINVAL;
+ }
+
+ luo_global.liveupdate_num = get_unaligned((u64 *)ptr);
+ pr_info("Retrieved live update data, liveupdate number: %lld\n",
+ luo_global.liveupdate_num);
+
+ err = luo_session_setup_incoming(luo_global.fdt_in);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int __init liveupdate_early_init(void)
+{
+ int err;
+
+ err = luo_early_startup();
+ if (err) {
+ luo_global.enabled = false;
+ luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n",
+ ERR_PTR(err));
+ }
+
+ return err;
+}
+early_initcall(liveupdate_early_init);
+
+/* Called during boot to create outgoing LUO fdt tree */
+static int __init luo_fdt_setup(void)
+{
+ const u64 ln = luo_global.liveupdate_num + 1;
+ void *fdt_out;
+ int err;
+
+ fdt_out = kho_alloc_preserve(LUO_FDT_SIZE);
+ if (IS_ERR(fdt_out)) {
+ pr_err("failed to allocate/preserve FDT memory\n");
+ return PTR_ERR(fdt_out);
+ }
+
+ err = fdt_create(fdt_out, LUO_FDT_SIZE);
+ err |= fdt_finish_reservemap(fdt_out);
+ err |= fdt_begin_node(fdt_out, "");
+ err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
+ err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
+ err |= luo_session_setup_outgoing(fdt_out);
+ err |= fdt_end_node(fdt_out);
+ err |= fdt_finish(fdt_out);
+ if (err)
+ goto exit_free;
+
+ err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
+ if (err)
+ goto exit_free;
+ luo_global.fdt_out = fdt_out;
+
+ return 0;
+
+exit_free:
+ kho_unpreserve_free(fdt_out);
+ pr_err("failed to prepare LUO FDT: %d\n", err);
+
+ return err;
+}
+
+/*
+ * late initcall because it initializes the outgoing tree that is needed only
+ * once userspace starts using /dev/liveupdate.
+ */
+static int __init luo_late_startup(void)
+{
+ int err;
+
+ if (!liveupdate_enabled())
+ return 0;
+
+ err = luo_fdt_setup();
+ if (err)
+ luo_global.enabled = false;
+
+ return err;
+}
+late_initcall(luo_late_startup);
+
+/* Public Functions */
+
+/**
+ * liveupdate_reboot() - Kernel reboot notifier for live update final
+ * serialization.
+ *
+ * This function is invoked directly from the reboot() syscall pathway
+ * if kexec is in progress.
+ *
+ * If any callback fails, this function aborts KHO, undoes the freeze()
+ * callbacks, and returns an error.
+ */
+int liveupdate_reboot(void)
+{
+ int err;
+
+ if (!liveupdate_enabled())
+ return 0;
+
+ err = luo_session_serialize();
+ if (err)
+ return err;
+
+ err = kho_finalize();
+ if (err) {
+ pr_err("kho_finalize failed %d\n", err);
+ /*
+ * kho_finalize() may return libfdt errors, to aboid passing to
+ * userspace unknown errors, change this to EAGAIN.
+ */
+ err = -EAGAIN;
+ }
+
+ return err;
+}
+
+/**
+ * liveupdate_enabled - Check if the live update feature is enabled.
+ *
+ * This function returns the state of the live update feature flag, which
+ * can be controlled via the ``liveupdate`` kernel command-line parameter.
+ *
+ * @return true if live update is enabled, false otherwise.
+ */
+bool liveupdate_enabled(void)
+{
+ return luo_global.enabled;
+}
+
+/**
+ * DOC: LUO ioctl Interface
+ *
+ * The IOCTL user-space control interface for the LUO subsystem.
+ * It registers a character device, typically found at ``/dev/liveupdate``,
+ * which allows a userspace agent to manage the LUO state machine and its
+ * associated resources, such as preservable file descriptors.
+ *
+ * To ensure that the state machine is controlled by a single entity, access
+ * to this device is exclusive: only one process is permitted to have
+ * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will
+ * fail with -EBUSY until the first process closes its file descriptor.
+ * This singleton model simplifies state management by preventing conflicting
+ * commands from multiple userspace agents.
+ */
+
+struct luo_device_state {
+ struct miscdevice miscdev;
+ atomic_t in_use;
+};
+
+static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
+{
+ struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
+ struct file *file;
+ int err;
+
+ argp->fd = get_unused_fd_flags(O_CLOEXEC);
+ if (argp->fd < 0)
+ return argp->fd;
+
+ err = luo_session_create(argp->name, &file);
+ if (err)
+ goto err_put_fd;
+
+ err = luo_ucmd_respond(ucmd, sizeof(*argp));
+ if (err)
+ goto err_put_file;
+
+ fd_install(argp->fd, file);
+
+ return 0;
+
+err_put_file:
+ fput(file);
+err_put_fd:
+ put_unused_fd(argp->fd);
+
+ return err;
+}
+
+static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd)
+{
+ struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd;
+ struct file *file;
+ int err;
+
+ argp->fd = get_unused_fd_flags(O_CLOEXEC);
+ if (argp->fd < 0)
+ return argp->fd;
+
+ err = luo_session_retrieve(argp->name, &file);
+ if (err < 0)
+ goto err_put_fd;
+
+ err = luo_ucmd_respond(ucmd, sizeof(*argp));
+ if (err)
+ goto err_put_file;
+
+ fd_install(argp->fd, file);
+
+ return 0;
+
+err_put_file:
+ fput(file);
+err_put_fd:
+ put_unused_fd(argp->fd);
+
+ return err;
+}
+
+static int luo_open(struct inode *inodep, struct file *filep)
+{
+ struct luo_device_state *ldev = container_of(filep->private_data,
+ struct luo_device_state,
+ miscdev);
+
+ if (atomic_cmpxchg(&ldev->in_use, 0, 1))
+ return -EBUSY;
+
+ /* Always return -EIO to user if deserialization fail */
+ if (luo_session_deserialize()) {
+ atomic_set(&ldev->in_use, 0);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int luo_release(struct inode *inodep, struct file *filep)
+{
+ struct luo_device_state *ldev = container_of(filep->private_data,
+ struct luo_device_state,
+ miscdev);
+ atomic_set(&ldev->in_use, 0);
+
+ return 0;
+}
+
+union ucmd_buffer {
+ struct liveupdate_ioctl_create_session create;
+ struct liveupdate_ioctl_retrieve_session retrieve;
+};
+
+struct luo_ioctl_op {
+ unsigned int size;
+ unsigned int min_size;
+ unsigned int ioctl_num;
+ int (*execute)(struct luo_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
+ [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \
+ .size = sizeof(_struct) + \
+ BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
+ sizeof(_struct)), \
+ .min_size = offsetofend(_struct, _last), \
+ .ioctl_num = _ioctl, \
+ .execute = _fn, \
+ }
+
+static const struct luo_ioctl_op luo_ioctl_ops[] = {
+ IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session,
+ struct liveupdate_ioctl_create_session, name),
+ IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session,
+ struct liveupdate_ioctl_retrieve_session, name),
+};
+
+static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ const struct luo_ioctl_op *op;
+ struct luo_ucmd ucmd = {};
+ union ucmd_buffer buf;
+ unsigned int nr;
+ int err;
+
+ nr = _IOC_NR(cmd);
+ if (nr < LIVEUPDATE_CMD_BASE ||
+ (nr - LIVEUPDATE_CMD_BASE) >= ARRAY_SIZE(luo_ioctl_ops)) {
+ return -EINVAL;
+ }
+
+ ucmd.ubuffer = (void __user *)arg;
+ err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+ if (err)
+ return err;
+
+ op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE];
+ if (op->ioctl_num != cmd)
+ return -ENOIOCTLCMD;
+ if (ucmd.user_size < op->min_size)
+ return -EINVAL;
+
+ ucmd.cmd = &buf;
+ err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+ ucmd.user_size);
+ if (err)
+ return err;
+
+ return op->execute(&ucmd);
+}
+
+static const struct file_operations luo_fops = {
+ .owner = THIS_MODULE,
+ .open = luo_open,
+ .release = luo_release,
+ .unlocked_ioctl = luo_ioctl,
+};
+
+static struct luo_device_state luo_dev = {
+ .miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "liveupdate",
+ .fops = &luo_fops,
+ },
+ .in_use = ATOMIC_INIT(0),
+};
+
+static int __init liveupdate_ioctl_init(void)
+{
+ if (!liveupdate_enabled())
+ return 0;
+
+ return misc_register(&luo_dev.miscdev);
+}
+late_initcall(liveupdate_ioctl_init);
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
new file mode 100644
index 000000000000..ddff87917b21
--- /dev/null
+++ b/kernel/liveupdate/luo_file.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: LUO File Descriptors
+ *
+ * LUO provides the infrastructure to preserve specific, stateful file
+ * descriptors across a kexec-based live update. The primary goal is to allow
+ * workloads, such as virtual machines using vfio, memfd, or iommufd, to
+ * retain access to their essential resources without interruption.
+ *
+ * The framework is built around a callback-based handler model and a well-
+ * defined lifecycle for each preserved file.
+ *
+ * Handler Registration:
+ * Kernel modules responsible for a specific file type (e.g., memfd, vfio)
+ * register a &struct liveupdate_file_handler. This handler provides a set of
+ * callbacks that LUO invokes at different stages of the update process, most
+ * notably:
+ *
+ * - can_preserve(): A lightweight check to determine if the handler is
+ * compatible with a given 'struct file'.
+ * - preserve(): The heavyweight operation that saves the file's state and
+ * returns an opaque u64 handle. This is typically performed while the
+ * workload is still active to minimize the downtime during the
+ * actual reboot transition.
+ * - unpreserve(): Cleans up any resources allocated by .preserve(), called
+ * if the preservation process is aborted before the reboot (i.e. session is
+ * closed).
+ * - freeze(): A final pre-reboot opportunity to prepare the state for kexec.
+ * We are already in reboot syscall, and therefore userspace cannot mutate
+ * the file anymore.
+ * - unfreeze(): Undoes the actions of .freeze(), called if the live update
+ * is aborted after the freeze phase.
+ * - retrieve(): Reconstructs the file in the new kernel from the preserved
+ * handle.
+ * - finish(): Performs final check and cleanup in the new kernel. After
+ * succesul finish call, LUO gives up ownership to this file.
+ *
+ * File Preservation Lifecycle happy path:
+ *
+ * 1. Preserve (Normal Operation): A userspace agent preserves files one by one
+ * via an ioctl. For each file, luo_preserve_file() finds a compatible
+ * handler, calls its .preserve() operation, and creates an internal &struct
+ * luo_file to track the live state.
+ *
+ * 2. Freeze (Pre-Reboot): Just before the kexec, luo_file_freeze() is called.
+ * It iterates through all preserved files, calls their respective .freeze()
+ * operation, and serializes their final metadata (compatible string, token,
+ * and data handle) into a contiguous memory block for KHO.
+ *
+ * 3. Deserialize: After kexec, luo_file_deserialize() runs when session gets
+ * deserialized (which is when /dev/liveupdate is first opened). It reads the
+ * serialized data from the KHO memory region and reconstructs the in-memory
+ * list of &struct luo_file instances for the new kernel, linking them to
+ * their corresponding handlers.
+ *
+ * 4. Retrieve (New Kernel - Userspace Ready): The userspace agent can now
+ * restore file descriptors by providing a token. luo_retrieve_file()
+ * searches for the matching token, calls the handler's .retrieve() op to
+ * re-create the 'struct file', and returns a new FD. Files can be
+ * retrieved in ANY order.
+ *
+ * 5. Finish (New Kernel - Cleanup): Once a session retrival is complete,
+ * luo_file_finish() is called. It iterates through all files, invokes their
+ * .finish() operations for final cleanup, and releases all associated kernel
+ * resources.
+ *
+ * File Preservation Lifecycle unhappy paths:
+ *
+ * 1. Abort Before Reboot: If the userspace agent aborts the live update
+ * process before calling reboot (e.g., by closing the session file
+ * descriptor), the session's release handler calls
+ * luo_file_unpreserve_files(). This invokes the .unpreserve() callback on
+ * all preserved files, ensuring all allocated resources are cleaned up and
+ * returning the system to a clean state.
+ *
+ * 2. Freeze Failure: During the reboot() syscall, if any handler's .freeze()
+ * op fails, the .unfreeze() op is invoked on all previously *successful*
+ * freezes to roll back their state. The reboot() syscall then returns an
+ * error to userspace, canceling the live update.
+ *
+ * 3. Finish Failure: In the new kernel, if a handler's .finish() op fails,
+ * the luo_file_finish() operation is aborted. LUO retains ownership of
+ * all files within that session, including those that were not yet
+ * processed. The userspace agent can attempt to call the finish operation
+ * again later. If the issue cannot be resolved, these resources will be held
+ * by LUO until the next live update cycle, at which point they will be
+ * discarded.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cleanup.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/liveupdate.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "luo_internal.h"
+
+static LIST_HEAD(luo_file_handler_list);
+
+/* 2 4K pages, give space for 128 files per file_set */
+#define LUO_FILE_PGCNT 2ul
+#define LUO_FILE_MAX \
+ ((LUO_FILE_PGCNT << PAGE_SHIFT) / sizeof(struct luo_file_ser))
+
+/**
+ * struct luo_file - Represents a single preserved file instance.
+ * @fh: Pointer to the &struct liveupdate_file_handler that manages
+ * this type of file.
+ * @file: Pointer to the kernel's &struct file that is being preserved.
+ * This is NULL in the new kernel until the file is successfully
+ * retrieved.
+ * @serialized_data: The opaque u64 handle to the serialized state of the file.
+ * This handle is passed back to the handler's .freeze(),
+ * .retrieve(), and .finish() callbacks, allowing it to track
+ * and update its serialized state across phases.
+ * @private_data: Pointer to the private data for the file used to hold runtime
+ * state that is not preserved. Set by the handler's .preserve()
+ * callback, and must be freed in the handler's .unpreserve()
+ * callback.
+ * @retrieved: A flag indicating whether a user/kernel in the new kernel has
+ * successfully called retrieve() on this file. This prevents
+ * multiple retrieval attempts.
+ * @mutex: A mutex that protects the fields of this specific instance
+ * (e.g., @retrieved, @file), ensuring that operations like
+ * retrieving or finishing a file are atomic.
+ * @list: The list_head linking this instance into its parent
+ * file_set's list of preserved files.
+ * @token: The user-provided unique token used to identify this file.
+ *
+ * This structure is the core in-kernel representation of a single file being
+ * managed through a live update. An instance is created by luo_preserve_file()
+ * to link a 'struct file' to its corresponding handler, a user-provided token,
+ * and the serialized state handle returned by the handler's .preserve()
+ * operation.
+ *
+ * These instances are tracked in a per-file_set list. The @serialized_data
+ * field, which holds a handle to the file's serialized state, may be updated
+ * during the .freeze() callback before being serialized for the next kernel.
+ * After reboot, these structures are recreated by luo_file_deserialize() and
+ * are finally cleaned up by luo_file_finish().
+ */
+struct luo_file {
+ struct liveupdate_file_handler *fh;
+ struct file *file;
+ u64 serialized_data;
+ void *private_data;
+ bool retrieved;
+ struct mutex mutex;
+ struct list_head list;
+ u64 token;
+};
+
+static int luo_alloc_files_mem(struct luo_file_set *file_set)
+{
+ size_t size;
+ void *mem;
+
+ if (file_set->files)
+ return 0;
+
+ WARN_ON_ONCE(file_set->count);
+
+ size = LUO_FILE_PGCNT << PAGE_SHIFT;
+ mem = kho_alloc_preserve(size);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ file_set->files = mem;
+
+ return 0;
+}
+
+static void luo_free_files_mem(struct luo_file_set *file_set)
+{
+ /* If file_set has files, no need to free preservation memory */
+ if (file_set->count)
+ return;
+
+ if (!file_set->files)
+ return;
+
+ kho_unpreserve_free(file_set->files);
+ file_set->files = NULL;
+}
+
+static bool luo_token_is_used(struct luo_file_set *file_set, u64 token)
+{
+ struct luo_file *iter;
+
+ list_for_each_entry(iter, &file_set->files_list, list) {
+ if (iter->token == token)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * luo_preserve_file - Initiate the preservation of a file descriptor.
+ * @file_set: The file_set to which the preserved file will be added.
+ * @token: A unique, user-provided identifier for the file.
+ * @fd: The file descriptor to be preserved.
+ *
+ * This function orchestrates the first phase of preserving a file. Upon entry,
+ * it takes a reference to the 'struct file' via fget(), effectively making LUO
+ * a co-owner of the file. This reference is held until the file is either
+ * unpreserved or successfully finished in the next kernel, preventing the file
+ * from being prematurely destroyed.
+ *
+ * This function orchestrates the first phase of preserving a file. It performs
+ * the following steps:
+ *
+ * 1. Validates that the @token is not already in use within the file_set.
+ * 2. Ensures the file_set's memory for files serialization is allocated
+ * (allocates if needed).
+ * 3. Iterates through registered handlers, calling can_preserve() to find one
+ * compatible with the given @fd.
+ * 4. Calls the handler's .preserve() operation, which saves the file's state
+ * and returns an opaque private data handle.
+ * 5. Adds the new instance to the file_set's internal list.
+ *
+ * On success, LUO takes a reference to the 'struct file' and considers it
+ * under its management until it is unpreserved or finished.
+ *
+ * In case of any failure, all intermediate allocations (file reference, memory
+ * for the 'luo_file' struct, etc.) are cleaned up before returning an error.
+ *
+ * Context: Can be called from an ioctl handler during normal system operation.
+ * Return: 0 on success. Returns a negative errno on failure:
+ * -EEXIST if the token is already used.
+ * -EBADF if the file descriptor is invalid.
+ * -ENOSPC if the file_set is full.
+ * -ENOENT if no compatible handler is found.
+ * -ENOMEM on memory allocation failure.
+ * Other erros might be returned by .preserve().
+ */
+int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
+{
+ struct liveupdate_file_op_args args = {0};
+ struct liveupdate_file_handler *fh;
+ struct luo_file *luo_file;
+ struct file *file;
+ int err;
+
+ if (luo_token_is_used(file_set, token))
+ return -EEXIST;
+
+ if (file_set->count == LUO_FILE_MAX)
+ return -ENOSPC;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ err = luo_alloc_files_mem(file_set);
+ if (err)
+ goto err_fput;
+
+ err = -ENOENT;
+ luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+ if (fh->ops->can_preserve(fh, file)) {
+ err = 0;
+ break;
+ }
+ }
+
+ /* err is still -ENOENT if no handler was found */
+ if (err)
+ goto err_free_files_mem;
+
+ luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL);
+ if (!luo_file) {
+ err = -ENOMEM;
+ goto err_free_files_mem;
+ }
+
+ luo_file->file = file;
+ luo_file->fh = fh;
+ luo_file->token = token;
+ luo_file->retrieved = false;
+ mutex_init(&luo_file->mutex);
+
+ args.handler = fh;
+ args.file = file;
+ err = fh->ops->preserve(&args);
+ if (err)
+ goto err_kfree;
+
+ luo_file->serialized_data = args.serialized_data;
+ luo_file->private_data = args.private_data;
+ list_add_tail(&luo_file->list, &file_set->files_list);
+ file_set->count++;
+
+ return 0;
+
+err_kfree:
+ kfree(luo_file);
+err_free_files_mem:
+ luo_free_files_mem(file_set);
+err_fput:
+ fput(file);
+
+ return err;
+}
+
+/**
+ * luo_file_unpreserve_files - Unpreserves all files from a file_set.
+ * @file_set: The files to be cleaned up.
+ *
+ * This function serves as the primary cleanup path for a file_set. It is
+ * invoked when the userspace agent closes the file_set's file descriptor.
+ *
+ * For each file, it performs the following cleanup actions:
+ * 1. Calls the handler's .unpreserve() callback to allow the handler to
+ * release any resources it allocated.
+ * 2. Removes the file from the file_set's internal tracking list.
+ * 3. Releases the reference to the 'struct file' that was taken by
+ * luo_preserve_file() via fput(), returning ownership.
+ * 4. Frees the memory associated with the internal 'struct luo_file'.
+ *
+ * After all individual files are unpreserved, it frees the contiguous memory
+ * block that was allocated to hold their serialization data.
+ */
+void luo_file_unpreserve_files(struct luo_file_set *file_set)
+{
+ struct luo_file *luo_file;
+
+ while (!list_empty(&file_set->files_list)) {
+ struct liveupdate_file_op_args args = {0};
+
+ luo_file = list_last_entry(&file_set->files_list,
+ struct luo_file, list);
+
+ args.handler = luo_file->fh;
+ args.file = luo_file->file;
+ args.serialized_data = luo_file->serialized_data;
+ args.private_data = luo_file->private_data;
+ luo_file->fh->ops->unpreserve(&args);
+
+ list_del(&luo_file->list);
+ file_set->count--;
+
+ fput(luo_file->file);
+ mutex_destroy(&luo_file->mutex);
+ kfree(luo_file);
+ }
+
+ luo_free_files_mem(file_set);
+}
+
+static int luo_file_freeze_one(struct luo_file_set *file_set,
+ struct luo_file *luo_file)
+{
+ int err = 0;
+
+ guard(mutex)(&luo_file->mutex);
+
+ if (luo_file->fh->ops->freeze) {
+ struct liveupdate_file_op_args args = {0};
+
+ args.handler = luo_file->fh;
+ args.file = luo_file->file;
+ args.serialized_data = luo_file->serialized_data;
+ args.private_data = luo_file->private_data;
+
+ err = luo_file->fh->ops->freeze(&args);
+ if (!err)
+ luo_file->serialized_data = args.serialized_data;
+ }
+
+ return err;
+}
+
+static void luo_file_unfreeze_one(struct luo_file_set *file_set,
+ struct luo_file *luo_file)
+{
+ guard(mutex)(&luo_file->mutex);
+
+ if (luo_file->fh->ops->unfreeze) {
+ struct liveupdate_file_op_args args = {0};
+
+ args.handler = luo_file->fh;
+ args.file = luo_file->file;
+ args.serialized_data = luo_file->serialized_data;
+ args.private_data = luo_file->private_data;
+
+ luo_file->fh->ops->unfreeze(&args);
+ }
+
+ luo_file->serialized_data = 0;
+}
+
+static void __luo_file_unfreeze(struct luo_file_set *file_set,
+ struct luo_file *failed_entry)
+{
+ struct list_head *files_list = &file_set->files_list;
+ struct luo_file *luo_file;
+
+ list_for_each_entry(luo_file, files_list, list) {
+ if (luo_file == failed_entry)
+ break;
+
+ luo_file_unfreeze_one(file_set, luo_file);
+ }
+
+ memset(file_set->files, 0, LUO_FILE_PGCNT << PAGE_SHIFT);
+}
+
+/**
+ * luo_file_freeze - Freezes all preserved files and serializes their metadata.
+ * @file_set: The file_set whose files are to be frozen.
+ * @file_set_ser: Where to put the serialized file_set.
+ *
+ * This function is called from the reboot() syscall path, just before the
+ * kernel transitions to the new image via kexec. Its purpose is to perform the
+ * final preparation and serialization of all preserved files in the file_set.
+ *
+ * It iterates through each preserved file in FIFO order (the order of
+ * preservation) and performs two main actions:
+ *
+ * 1. Freezes the File: It calls the handler's .freeze() callback for each
+ * file. This gives the handler a final opportunity to quiesce the device or
+ * prepare its state for the upcoming reboot. The handler may update its
+ * private data handle during this step.
+ *
+ * 2. Serializes Metadata: After a successful freeze, it copies the final file
+ * metadata—the handler's compatible string, the user token, and the final
+ * private data handle—into the pre-allocated contiguous memory buffer
+ * (file_set->files) that will be handed over to the next kernel via KHO.
+ *
+ * Error Handling (Rollback):
+ * This function is atomic. If any handler's .freeze() operation fails, the
+ * entire live update is aborted. The __luo_file_unfreeze() helper is
+ * immediately called to invoke the .unfreeze() op on all files that were
+ * successfully frozen before the point of failure, rolling them back to a
+ * running state. The function then returns an error, causing the reboot()
+ * syscall to fail.
+ *
+ * Context: Called only from the liveupdate_reboot() path.
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int luo_file_freeze(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser)
+{
+ struct luo_file_ser *file_ser = file_set->files;
+ struct luo_file *luo_file;
+ int err;
+ int i;
+
+ if (!file_set->count)
+ return 0;
+
+ if (WARN_ON(!file_ser))
+ return -EINVAL;
+
+ i = 0;
+ list_for_each_entry(luo_file, &file_set->files_list, list) {
+ err = luo_file_freeze_one(file_set, luo_file);
+ if (err < 0) {
+ pr_warn("Freeze failed for token[%#0llx] handler[%s] err[%pe]\n",
+ luo_file->token, luo_file->fh->compatible,
+ ERR_PTR(err));
+ goto err_unfreeze;
+ }
+
+ strscpy(file_ser[i].compatible, luo_file->fh->compatible,
+ sizeof(file_ser[i].compatible));
+ file_ser[i].data = luo_file->serialized_data;
+ file_ser[i].token = luo_file->token;
+ i++;
+ }
+
+ file_set_ser->count = file_set->count;
+ if (file_set->files)
+ file_set_ser->files = virt_to_phys(file_set->files);
+
+ return 0;
+
+err_unfreeze:
+ __luo_file_unfreeze(file_set, luo_file);
+
+ return err;
+}
+
+/**
+ * luo_file_unfreeze - Unfreezes all files in a file_set and clear serialization
+ * @file_set: The file_set whose files are to be unfrozen.
+ * @file_set_ser: Serialized file_set.
+ *
+ * This function rolls back the state of all files in a file_set after the
+ * freeze phase has begun but must be aborted. It is the counterpart to
+ * luo_file_freeze().
+ *
+ * It invokes the __luo_file_unfreeze() helper with a NULL argument, which
+ * signals the helper to iterate through all files in the file_set and call
+ * their respective .unfreeze() handler callbacks.
+ *
+ * Context: This is called when the live update is aborted during
+ * the reboot() syscall, after luo_file_freeze() has been called.
+ */
+void luo_file_unfreeze(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser)
+{
+ if (!file_set->count)
+ return;
+
+ __luo_file_unfreeze(file_set, NULL);
+ memset(file_set_ser, 0, sizeof(*file_set_ser));
+}
+
+/**
+ * luo_retrieve_file - Restores a preserved file from a file_set by its token.
+ * @file_set: The file_set from which to retrieve the file.
+ * @token: The unique token identifying the file to be restored.
+ * @filep: Output parameter; on success, this is populated with a pointer
+ * to the newly retrieved 'struct file'.
+ *
+ * This function is the primary mechanism for recreating a file in the new
+ * kernel after a live update. It searches the file_set's list of deserialized
+ * files for an entry matching the provided @token.
+ *
+ * The operation is idempotent: if a file has already been successfully
+ * retrieved, this function will simply return a pointer to the existing
+ * 'struct file' and report success without re-executing the retrieve
+ * operation. This is handled by checking the 'retrieved' flag under a lock.
+ *
+ * File retrieval can happen in any order; it is not bound by the order of
+ * preservation.
+ *
+ * Context: Can be called from an ioctl or other in-kernel code in the new
+ * kernel.
+ * Return: 0 on success. Returns a negative errno on failure:
+ * -ENOENT if no file with the matching token is found.
+ * Any error code returned by the handler's .retrieve() op.
+ */
+int luo_retrieve_file(struct luo_file_set *file_set, u64 token,
+ struct file **filep)
+{
+ struct liveupdate_file_op_args args = {0};
+ struct luo_file *luo_file;
+ int err;
+
+ if (list_empty(&file_set->files_list))
+ return -ENOENT;
+
+ list_for_each_entry(luo_file, &file_set->files_list, list) {
+ if (luo_file->token == token)
+ break;
+ }
+
+ if (luo_file->token != token)
+ return -ENOENT;
+
+ guard(mutex)(&luo_file->mutex);
+ if (luo_file->retrieved) {
+ /*
+ * Someone is asking for this file again, so get a reference
+ * for them.
+ */
+ get_file(luo_file->file);
+ *filep = luo_file->file;
+ return 0;
+ }
+
+ args.handler = luo_file->fh;
+ args.serialized_data = luo_file->serialized_data;
+ err = luo_file->fh->ops->retrieve(&args);
+ if (!err) {
+ luo_file->file = args.file;
+
+ /* Get reference so we can keep this file in LUO until finish */
+ get_file(luo_file->file);
+ *filep = luo_file->file;
+ luo_file->retrieved = true;
+ }
+
+ return err;
+}
+
+static int luo_file_can_finish_one(struct luo_file_set *file_set,
+ struct luo_file *luo_file)
+{
+ bool can_finish = true;
+
+ guard(mutex)(&luo_file->mutex);
+
+ if (luo_file->fh->ops->can_finish) {
+ struct liveupdate_file_op_args args = {0};
+
+ args.handler = luo_file->fh;
+ args.file = luo_file->file;
+ args.serialized_data = luo_file->serialized_data;
+ args.retrieved = luo_file->retrieved;
+ can_finish = luo_file->fh->ops->can_finish(&args);
+ }
+
+ return can_finish ? 0 : -EBUSY;
+}
+
+static void luo_file_finish_one(struct luo_file_set *file_set,
+ struct luo_file *luo_file)
+{
+ struct liveupdate_file_op_args args = {0};
+
+ guard(mutex)(&luo_file->mutex);
+
+ args.handler = luo_file->fh;
+ args.file = luo_file->file;
+ args.serialized_data = luo_file->serialized_data;
+ args.retrieved = luo_file->retrieved;
+
+ luo_file->fh->ops->finish(&args);
+}
+
+/**
+ * luo_file_finish - Completes the lifecycle for all files in a file_set.
+ * @file_set: The file_set to be finalized.
+ *
+ * This function orchestrates the final teardown of a live update file_set in
+ * the new kernel. It should be called after all necessary files have been
+ * retrieved and the userspace agent is ready to release the preserved state.
+ *
+ * The function iterates through all tracked files. For each file, it performs
+ * the following sequence of cleanup actions:
+ *
+ * 1. If file is not yet retrieved, retrieves it, and calls can_finish() on
+ * every file in the file_set. If all can_finish return true, continue to
+ * finish.
+ * 2. Calls the handler's .finish() callback (via luo_file_finish_one) to
+ * allow for final resource cleanup within the handler.
+ * 3. Releases LUO's ownership reference on the 'struct file' via fput(). This
+ * is the counterpart to the get_file() call in luo_retrieve_file().
+ * 4. Removes the 'struct luo_file' from the file_set's internal list.
+ * 5. Frees the memory for the 'struct luo_file' instance itself.
+ *
+ * After successfully finishing all individual files, it frees the
+ * contiguous memory block that was used to transfer the serialized metadata
+ * from the previous kernel.
+ *
+ * Error Handling (Atomic Failure):
+ * This operation is atomic. If any handler's .can_finish() op fails, the entire
+ * function aborts immediately and returns an error.
+ *
+ * Context: Can be called from an ioctl handler in the new kernel.
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int luo_file_finish(struct luo_file_set *file_set)
+{
+ struct list_head *files_list = &file_set->files_list;
+ struct luo_file *luo_file;
+ int err;
+
+ if (!file_set->count)
+ return 0;
+
+ list_for_each_entry(luo_file, files_list, list) {
+ err = luo_file_can_finish_one(file_set, luo_file);
+ if (err)
+ return err;
+ }
+
+ while (!list_empty(&file_set->files_list)) {
+ luo_file = list_last_entry(&file_set->files_list,
+ struct luo_file, list);
+
+ luo_file_finish_one(file_set, luo_file);
+
+ if (luo_file->file)
+ fput(luo_file->file);
+ list_del(&luo_file->list);
+ file_set->count--;
+ mutex_destroy(&luo_file->mutex);
+ kfree(luo_file);
+ }
+
+ if (file_set->files) {
+ kho_restore_free(file_set->files);
+ file_set->files = NULL;
+ }
+
+ return 0;
+}
+
+/**
+ * luo_file_deserialize - Reconstructs the list of preserved files in the new kernel.
+ * @file_set: The incoming file_set to fill with deserialized data.
+ * @file_set_ser: Serialized KHO file_set data from the previous kernel.
+ *
+ * This function is called during the early boot process of the new kernel. It
+ * takes the raw, contiguous memory block of 'struct luo_file_ser' entries,
+ * provided by the previous kernel, and transforms it back into a live,
+ * in-memory linked list of 'struct luo_file' instances.
+ *
+ * For each serialized entry, it performs the following steps:
+ * 1. Reads the 'compatible' string.
+ * 2. Searches the global list of registered file handlers for one that
+ * matches the compatible string.
+ * 3. Allocates a new 'struct luo_file'.
+ * 4. Populates the new structure with the deserialized data (token, private
+ * data handle) and links it to the found handler. The 'file' pointer is
+ * initialized to NULL, as the file has not been retrieved yet.
+ * 5. Adds the new 'struct luo_file' to the file_set's files_list.
+ *
+ * This prepares the file_set for userspace, which can later call
+ * luo_retrieve_file() to restore the actual file descriptors.
+ *
+ * Context: Called from session deserialization.
+ */
+int luo_file_deserialize(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser)
+{
+ struct luo_file_ser *file_ser;
+ u64 i;
+
+ if (!file_set_ser->files) {
+ WARN_ON(file_set_ser->count);
+ return 0;
+ }
+
+ file_set->count = file_set_ser->count;
+ file_set->files = phys_to_virt(file_set_ser->files);
+
+ /*
+ * Note on error handling:
+ *
+ * If deserialization fails (e.g., allocation failure or corrupt data),
+ * we intentionally skip cleanup of files that were already restored.
+ *
+ * A partial failure leaves the preserved state inconsistent.
+ * Implementing a safe "undo" to unwind complex dependencies (sessions,
+ * files, hardware state) is error-prone and provides little value, as
+ * the system is effectively in a broken state.
+ *
+ * We treat these resources as leaked. The expected recovery path is for
+ * userspace to detect the failure and trigger a reboot, which will
+ * reliably reset devices and reclaim memory.
+ */
+ file_ser = file_set->files;
+ for (i = 0; i < file_set->count; i++) {
+ struct liveupdate_file_handler *fh;
+ bool handler_found = false;
+ struct luo_file *luo_file;
+
+ luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+ if (!strcmp(fh->compatible, file_ser[i].compatible)) {
+ handler_found = true;
+ break;
+ }
+ }
+
+ if (!handler_found) {
+ pr_warn("No registered handler for compatible '%s'\n",
+ file_ser[i].compatible);
+ return -ENOENT;
+ }
+
+ luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL);
+ if (!luo_file)
+ return -ENOMEM;
+
+ luo_file->fh = fh;
+ luo_file->file = NULL;
+ luo_file->serialized_data = file_ser[i].data;
+ luo_file->token = file_ser[i].token;
+ luo_file->retrieved = false;
+ mutex_init(&luo_file->mutex);
+ list_add_tail(&luo_file->list, &file_set->files_list);
+ }
+
+ return 0;
+}
+
+void luo_file_set_init(struct luo_file_set *file_set)
+{
+ INIT_LIST_HEAD(&file_set->files_list);
+}
+
+void luo_file_set_destroy(struct luo_file_set *file_set)
+{
+ WARN_ON(file_set->count);
+ WARN_ON(!list_empty(&file_set->files_list));
+}
+
+/**
+ * liveupdate_register_file_handler - Register a file handler with LUO.
+ * @fh: Pointer to a caller-allocated &struct liveupdate_file_handler.
+ * The caller must initialize this structure, including a unique
+ * 'compatible' string and a valid 'fh' callbacks. This function adds the
+ * handler to the global list of supported file handlers.
+ *
+ * Context: Typically called during module initialization for file types that
+ * support live update preservation.
+ *
+ * Return: 0 on success. Negative errno on failure.
+ */
+int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
+{
+ struct liveupdate_file_handler *fh_iter;
+ int err;
+
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ /* Sanity check that all required callbacks are set */
+ if (!fh->ops->preserve || !fh->ops->unpreserve || !fh->ops->retrieve ||
+ !fh->ops->finish || !fh->ops->can_preserve) {
+ return -EINVAL;
+ }
+
+ /*
+ * Ensure the system is quiescent (no active sessions).
+ * This prevents registering new handlers while sessions are active or
+ * while deserialization is in progress.
+ */
+ if (!luo_session_quiesce())
+ return -EBUSY;
+
+ /* Check for duplicate compatible strings */
+ luo_list_for_each_private(fh_iter, &luo_file_handler_list, list) {
+ if (!strcmp(fh_iter->compatible, fh->compatible)) {
+ pr_err("File handler registration failed: Compatible string '%s' already registered.\n",
+ fh->compatible);
+ err = -EEXIST;
+ goto err_resume;
+ }
+ }
+
+ /* Pin the module implementing the handler */
+ if (!try_module_get(fh->ops->owner)) {
+ err = -EAGAIN;
+ goto err_resume;
+ }
+
+ INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list));
+ list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list);
+ luo_session_resume();
+
+ return 0;
+
+err_resume:
+ luo_session_resume();
+ return err;
+}
+
+/**
+ * liveupdate_unregister_file_handler - Unregister a liveupdate file handler
+ * @fh: The file handler to unregister
+ *
+ * Unregisters the file handler from the liveupdate core. This function
+ * reverses the operations of liveupdate_register_file_handler().
+ *
+ * It ensures safe removal by checking that:
+ * No live update session is currently in progress.
+ *
+ * If the unregistration fails, the internal test state is reverted.
+ *
+ * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live
+ * update is in progress, can't quiesce live update.
+ */
+int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
+{
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ if (!luo_session_quiesce())
+ return -EBUSY;
+
+ list_del(&ACCESS_PRIVATE(fh, list));
+ module_put(fh->ops->owner);
+ luo_session_resume();
+
+ return 0;
+}
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
new file mode 100644
index 000000000000..c8973b543d1d
--- /dev/null
+++ b/kernel/liveupdate/luo_internal.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _LINUX_LUO_INTERNAL_H
+#define _LINUX_LUO_INTERNAL_H
+
+#include <linux/liveupdate.h>
+#include <linux/uaccess.h>
+
+struct luo_ucmd {
+ void __user *ubuffer;
+ u32 user_size;
+ void *cmd;
+};
+
+static inline int luo_ucmd_respond(struct luo_ucmd *ucmd,
+ size_t kernel_cmd_size)
+{
+ /*
+ * Copy the minimum of what the user provided and what we actually
+ * have.
+ */
+ if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
+ min_t(size_t, ucmd->user_size, kernel_cmd_size))) {
+ return -EFAULT;
+ }
+ return 0;
+}
+
+/*
+ * Handles a deserialization failure: devices and memory is in unpredictable
+ * state.
+ *
+ * Continuing the boot process after a failure is dangerous because it could
+ * lead to leaks of private data.
+ */
+#define luo_restore_fail(__fmt, ...) panic(__fmt, ##__VA_ARGS__)
+
+/* Mimics list_for_each_entry() but for private list head entries */
+#define luo_list_for_each_private(pos, head, member) \
+ for (struct list_head *__iter = (head)->next; \
+ __iter != (head) && \
+ ({ pos = container_of(__iter, typeof(*(pos)), member); 1; }); \
+ __iter = __iter->next)
+
+/**
+ * struct luo_file_set - A set of files that belong to the same sessions.
+ * @files_list: An ordered list of files associated with this session, it is
+ * ordered by preservation time.
+ * @files: The physically contiguous memory block that holds the serialized
+ * state of files.
+ * @count: A counter tracking the number of files currently stored in the
+ * @files_list for this session.
+ */
+struct luo_file_set {
+ struct list_head files_list;
+ struct luo_file_ser *files;
+ long count;
+};
+
+/**
+ * struct luo_session - Represents an active or incoming Live Update session.
+ * @name: A unique name for this session, used for identification and
+ * retrieval.
+ * @ser: Pointer to the serialized data for this session.
+ * @list: A list_head member used to link this session into a global list
+ * of either outgoing (to be preserved) or incoming (restored from
+ * previous kernel) sessions.
+ * @retrieved: A boolean flag indicating whether this session has been
+ * retrieved by a consumer in the new kernel.
+ * @file_set: A set of files that belong to this session.
+ * @mutex: protects fields in the luo_session.
+ */
+struct luo_session {
+ char name[LIVEUPDATE_SESSION_NAME_LENGTH];
+ struct luo_session_ser *ser;
+ struct list_head list;
+ bool retrieved;
+ struct luo_file_set file_set;
+ struct mutex mutex;
+};
+
+int luo_session_create(const char *name, struct file **filep);
+int luo_session_retrieve(const char *name, struct file **filep);
+int __init luo_session_setup_outgoing(void *fdt);
+int __init luo_session_setup_incoming(void *fdt);
+int luo_session_serialize(void);
+int luo_session_deserialize(void);
+bool luo_session_quiesce(void);
+void luo_session_resume(void);
+
+int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd);
+void luo_file_unpreserve_files(struct luo_file_set *file_set);
+int luo_file_freeze(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser);
+void luo_file_unfreeze(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser);
+int luo_retrieve_file(struct luo_file_set *file_set, u64 token,
+ struct file **filep);
+int luo_file_finish(struct luo_file_set *file_set);
+int luo_file_deserialize(struct luo_file_set *file_set,
+ struct luo_file_set_ser *file_set_ser);
+void luo_file_set_init(struct luo_file_set *file_set);
+void luo_file_set_destroy(struct luo_file_set *file_set);
+
+#endif /* _LINUX_LUO_INTERNAL_H */
diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c
new file mode 100644
index 000000000000..dbdbc3bd7929
--- /dev/null
+++ b/kernel/liveupdate/luo_session.c
@@ -0,0 +1,646 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: LUO Sessions
+ *
+ * LUO Sessions provide the core mechanism for grouping and managing `struct
+ * file *` instances that need to be preserved across a kexec-based live
+ * update. Each session acts as a named container for a set of file objects,
+ * allowing a userspace agent to manage the lifecycle of resources critical to a
+ * workload.
+ *
+ * Core Concepts:
+ *
+ * - Named Containers: Sessions are identified by a unique, user-provided name,
+ * which is used for both creation in the current kernel and retrieval in the
+ * next kernel.
+ *
+ * - Userspace Interface: Session management is driven from userspace via
+ * ioctls on /dev/liveupdate.
+ *
+ * - Serialization: Session metadata is preserved using the KHO framework. When
+ * a live update is triggered via kexec, an array of `struct luo_session_ser`
+ * is populated and placed in a preserved memory region. An FDT node is also
+ * created, containing the count of sessions and the physical address of this
+ * array.
+ *
+ * Session Lifecycle:
+ *
+ * 1. Creation: A userspace agent calls `luo_session_create()` to create a
+ * new, empty session and receives a file descriptor for it.
+ *
+ * 2. Serialization: When the `reboot(LINUX_REBOOT_CMD_KEXEC)` syscall is
+ * made, `luo_session_serialize()` is called. It iterates through all
+ * active sessions and writes their metadata into a memory area preserved
+ * by KHO.
+ *
+ * 3. Deserialization (in new kernel): After kexec, `luo_session_deserialize()`
+ * runs, reading the serialized data and creating a list of `struct
+ * luo_session` objects representing the preserved sessions.
+ *
+ * 4. Retrieval: A userspace agent in the new kernel can then call
+ * `luo_session_retrieve()` with a session name to get a new file
+ * descriptor and access the preserved state.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/cleanup.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/libfdt.h>
+#include <linux/list.h>
+#include <linux/liveupdate.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/unaligned.h>
+#include <uapi/linux/liveupdate.h>
+#include "luo_internal.h"
+
+/* 16 4K pages, give space for 744 sessions */
+#define LUO_SESSION_PGCNT 16ul
+#define LUO_SESSION_MAX (((LUO_SESSION_PGCNT << PAGE_SHIFT) - \
+ sizeof(struct luo_session_header_ser)) / \
+ sizeof(struct luo_session_ser))
+
+/**
+ * struct luo_session_header - Header struct for managing LUO sessions.
+ * @count: The number of sessions currently tracked in the @list.
+ * @list: The head of the linked list of `struct luo_session` instances.
+ * @rwsem: A read-write semaphore providing synchronized access to the
+ * session list and other fields in this structure.
+ * @header_ser: The header data of serialization array.
+ * @ser: The serialized session data (an array of
+ * `struct luo_session_ser`).
+ * @active: Set to true when first initialized. If previous kernel did not
+ * send session data, active stays false for incoming.
+ */
+struct luo_session_header {
+ long count;
+ struct list_head list;
+ struct rw_semaphore rwsem;
+ struct luo_session_header_ser *header_ser;
+ struct luo_session_ser *ser;
+ bool active;
+};
+
+/**
+ * struct luo_session_global - Global container for managing LUO sessions.
+ * @incoming: The sessions passed from the previous kernel.
+ * @outgoing: The sessions that are going to be passed to the next kernel.
+ */
+struct luo_session_global {
+ struct luo_session_header incoming;
+ struct luo_session_header outgoing;
+};
+
+static struct luo_session_global luo_session_global = {
+ .incoming = {
+ .list = LIST_HEAD_INIT(luo_session_global.incoming.list),
+ .rwsem = __RWSEM_INITIALIZER(luo_session_global.incoming.rwsem),
+ },
+ .outgoing = {
+ .list = LIST_HEAD_INIT(luo_session_global.outgoing.list),
+ .rwsem = __RWSEM_INITIALIZER(luo_session_global.outgoing.rwsem),
+ },
+};
+
+static struct luo_session *luo_session_alloc(const char *name)
+{
+ struct luo_session *session = kzalloc(sizeof(*session), GFP_KERNEL);
+
+ if (!session)
+ return ERR_PTR(-ENOMEM);
+
+ strscpy(session->name, name, sizeof(session->name));
+ INIT_LIST_HEAD(&session->file_set.files_list);
+ luo_file_set_init(&session->file_set);
+ INIT_LIST_HEAD(&session->list);
+ mutex_init(&session->mutex);
+
+ return session;
+}
+
+static void luo_session_free(struct luo_session *session)
+{
+ luo_file_set_destroy(&session->file_set);
+ mutex_destroy(&session->mutex);
+ kfree(session);
+}
+
+static int luo_session_insert(struct luo_session_header *sh,
+ struct luo_session *session)
+{
+ struct luo_session *it;
+
+ guard(rwsem_write)(&sh->rwsem);
+
+ /*
+ * For outgoing we should make sure there is room in serialization array
+ * for new session.
+ */
+ if (sh == &luo_session_global.outgoing) {
+ if (sh->count == LUO_SESSION_MAX)
+ return -ENOMEM;
+ }
+
+ /*
+ * For small number of sessions this loop won't hurt performance
+ * but if we ever start using a lot of sessions, this might
+ * become a bottle neck during deserialization time, as it would
+ * cause O(n*n) complexity.
+ */
+ list_for_each_entry(it, &sh->list, list) {
+ if (!strncmp(it->name, session->name, sizeof(it->name)))
+ return -EEXIST;
+ }
+ list_add_tail(&session->list, &sh->list);
+ sh->count++;
+
+ return 0;
+}
+
+static void luo_session_remove(struct luo_session_header *sh,
+ struct luo_session *session)
+{
+ guard(rwsem_write)(&sh->rwsem);
+ list_del(&session->list);
+ sh->count--;
+}
+
+static int luo_session_finish_one(struct luo_session *session)
+{
+ guard(mutex)(&session->mutex);
+ return luo_file_finish(&session->file_set);
+}
+
+static void luo_session_unfreeze_one(struct luo_session *session,
+ struct luo_session_ser *ser)
+{
+ guard(mutex)(&session->mutex);
+ luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
+}
+
+static int luo_session_freeze_one(struct luo_session *session,
+ struct luo_session_ser *ser)
+{
+ guard(mutex)(&session->mutex);
+ return luo_file_freeze(&session->file_set, &ser->file_set_ser);
+}
+
+static int luo_session_release(struct inode *inodep, struct file *filep)
+{
+ struct luo_session *session = filep->private_data;
+ struct luo_session_header *sh;
+
+ /* If retrieved is set, it means this session is from incoming list */
+ if (session->retrieved) {
+ int err = luo_session_finish_one(session);
+
+ if (err) {
+ pr_warn("Unable to finish session [%s] on release\n",
+ session->name);
+ return err;
+ }
+ sh = &luo_session_global.incoming;
+ } else {
+ scoped_guard(mutex, &session->mutex)
+ luo_file_unpreserve_files(&session->file_set);
+ sh = &luo_session_global.outgoing;
+ }
+
+ luo_session_remove(sh, session);
+ luo_session_free(session);
+
+ return 0;
+}
+
+static int luo_session_preserve_fd(struct luo_session *session,
+ struct luo_ucmd *ucmd)
+{
+ struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
+ int err;
+
+ guard(mutex)(&session->mutex);
+ err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
+ if (err)
+ return err;
+
+ err = luo_ucmd_respond(ucmd, sizeof(*argp));
+ if (err)
+ pr_warn("The file was successfully preserved, but response to user failed\n");
+
+ return err;
+}
+
+static int luo_session_retrieve_fd(struct luo_session *session,
+ struct luo_ucmd *ucmd)
+{
+ struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
+ struct file *file;
+ int err;
+
+ argp->fd = get_unused_fd_flags(O_CLOEXEC);
+ if (argp->fd < 0)
+ return argp->fd;
+
+ guard(mutex)(&session->mutex);
+ err = luo_retrieve_file(&session->file_set, argp->token, &file);
+ if (err < 0)
+ goto err_put_fd;
+
+ err = luo_ucmd_respond(ucmd, sizeof(*argp));
+ if (err)
+ goto err_put_file;
+
+ fd_install(argp->fd, file);
+
+ return 0;
+
+err_put_file:
+ fput(file);
+err_put_fd:
+ put_unused_fd(argp->fd);
+
+ return err;
+}
+
+static int luo_session_finish(struct luo_session *session,
+ struct luo_ucmd *ucmd)
+{
+ struct liveupdate_session_finish *argp = ucmd->cmd;
+ int err = luo_session_finish_one(session);
+
+ if (err)
+ return err;
+
+ return luo_ucmd_respond(ucmd, sizeof(*argp));
+}
+
+union ucmd_buffer {
+ struct liveupdate_session_finish finish;
+ struct liveupdate_session_preserve_fd preserve;
+ struct liveupdate_session_retrieve_fd retrieve;
+};
+
+struct luo_ioctl_op {
+ unsigned int size;
+ unsigned int min_size;
+ unsigned int ioctl_num;
+ int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
+ [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = { \
+ .size = sizeof(_struct) + \
+ BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
+ sizeof(_struct)), \
+ .min_size = offsetofend(_struct, _last), \
+ .ioctl_num = _ioctl, \
+ .execute = _fn, \
+ }
+
+static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
+ IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
+ struct liveupdate_session_finish, reserved),
+ IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
+ struct liveupdate_session_preserve_fd, token),
+ IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
+ struct liveupdate_session_retrieve_fd, token),
+};
+
+static long luo_session_ioctl(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct luo_session *session = filep->private_data;
+ const struct luo_ioctl_op *op;
+ struct luo_ucmd ucmd = {};
+ union ucmd_buffer buf;
+ unsigned int nr;
+ int ret;
+
+ nr = _IOC_NR(cmd);
+ if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
+ ARRAY_SIZE(luo_session_ioctl_ops)) {
+ return -EINVAL;
+ }
+
+ ucmd.ubuffer = (void __user *)arg;
+ ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+ if (ret)
+ return ret;
+
+ op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
+ if (op->ioctl_num != cmd)
+ return -ENOIOCTLCMD;
+ if (ucmd.user_size < op->min_size)
+ return -EINVAL;
+
+ ucmd.cmd = &buf;
+ ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+ ucmd.user_size);
+ if (ret)
+ return ret;
+
+ return op->execute(session, &ucmd);
+}
+
+static const struct file_operations luo_session_fops = {
+ .owner = THIS_MODULE,
+ .release = luo_session_release,
+ .unlocked_ioctl = luo_session_ioctl,
+};
+
+/* Create a "struct file" for session */
+static int luo_session_getfile(struct luo_session *session, struct file **filep)
+{
+ char name_buf[128];
+ struct file *file;
+
+ lockdep_assert_held(&session->mutex);
+ snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
+ file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ *filep = file;
+
+ return 0;
+}
+
+int luo_session_create(const char *name, struct file **filep)
+{
+ struct luo_session *session;
+ int err;
+
+ session = luo_session_alloc(name);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ err = luo_session_insert(&luo_session_global.outgoing, session);
+ if (err)
+ goto err_free;
+
+ scoped_guard(mutex, &session->mutex)
+ err = luo_session_getfile(session, filep);
+ if (err)
+ goto err_remove;
+
+ return 0;
+
+err_remove:
+ luo_session_remove(&luo_session_global.outgoing, session);
+err_free:
+ luo_session_free(session);
+
+ return err;
+}
+
+int luo_session_retrieve(const char *name, struct file **filep)
+{
+ struct luo_session_header *sh = &luo_session_global.incoming;
+ struct luo_session *session = NULL;
+ struct luo_session *it;
+ int err;
+
+ scoped_guard(rwsem_read, &sh->rwsem) {
+ list_for_each_entry(it, &sh->list, list) {
+ if (!strncmp(it->name, name, sizeof(it->name))) {
+ session = it;
+ break;
+ }
+ }
+ }
+
+ if (!session)
+ return -ENOENT;
+
+ guard(mutex)(&session->mutex);
+ if (session->retrieved)
+ return -EINVAL;
+
+ err = luo_session_getfile(session, filep);
+ if (!err)
+ session->retrieved = true;
+
+ return err;
+}
+
+int __init luo_session_setup_outgoing(void *fdt_out)
+{
+ struct luo_session_header_ser *header_ser;
+ u64 header_ser_pa;
+ int err;
+
+ header_ser = kho_alloc_preserve(LUO_SESSION_PGCNT << PAGE_SHIFT);
+ if (IS_ERR(header_ser))
+ return PTR_ERR(header_ser);
+ header_ser_pa = virt_to_phys(header_ser);
+
+ err = fdt_begin_node(fdt_out, LUO_FDT_SESSION_NODE_NAME);
+ err |= fdt_property_string(fdt_out, "compatible",
+ LUO_FDT_SESSION_COMPATIBLE);
+ err |= fdt_property(fdt_out, LUO_FDT_SESSION_HEADER, &header_ser_pa,
+ sizeof(header_ser_pa));
+ err |= fdt_end_node(fdt_out);
+
+ if (err)
+ goto err_unpreserve;
+
+ luo_session_global.outgoing.header_ser = header_ser;
+ luo_session_global.outgoing.ser = (void *)(header_ser + 1);
+ luo_session_global.outgoing.active = true;
+
+ return 0;
+
+err_unpreserve:
+ kho_unpreserve_free(header_ser);
+ return err;
+}
+
+int __init luo_session_setup_incoming(void *fdt_in)
+{
+ struct luo_session_header_ser *header_ser;
+ int err, header_size, offset;
+ u64 header_ser_pa;
+ const void *ptr;
+
+ offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_SESSION_NODE_NAME);
+ if (offset < 0) {
+ pr_err("Unable to get session node: [%s]\n",
+ LUO_FDT_SESSION_NODE_NAME);
+ return -EINVAL;
+ }
+
+ err = fdt_node_check_compatible(fdt_in, offset,
+ LUO_FDT_SESSION_COMPATIBLE);
+ if (err) {
+ pr_err("Session node incompatible [%s]\n",
+ LUO_FDT_SESSION_COMPATIBLE);
+ return -EINVAL;
+ }
+
+ header_size = 0;
+ ptr = fdt_getprop(fdt_in, offset, LUO_FDT_SESSION_HEADER, &header_size);
+ if (!ptr || header_size != sizeof(u64)) {
+ pr_err("Unable to get session header '%s' [%d]\n",
+ LUO_FDT_SESSION_HEADER, header_size);
+ return -EINVAL;
+ }
+
+ header_ser_pa = get_unaligned((u64 *)ptr);
+ header_ser = phys_to_virt(header_ser_pa);
+
+ luo_session_global.incoming.header_ser = header_ser;
+ luo_session_global.incoming.ser = (void *)(header_ser + 1);
+ luo_session_global.incoming.active = true;
+
+ return 0;
+}
+
+int luo_session_deserialize(void)
+{
+ struct luo_session_header *sh = &luo_session_global.incoming;
+ static bool is_deserialized;
+ static int err;
+
+ /* If has been deserialized, always return the same error code */
+ if (is_deserialized)
+ return err;
+
+ is_deserialized = true;
+ if (!sh->active)
+ return 0;
+
+ /*
+ * Note on error handling:
+ *
+ * If deserialization fails (e.g., allocation failure or corrupt data),
+ * we intentionally skip cleanup of sessions that were already restored.
+ *
+ * A partial failure leaves the preserved state inconsistent.
+ * Implementing a safe "undo" to unwind complex dependencies (sessions,
+ * files, hardware state) is error-prone and provides little value, as
+ * the system is effectively in a broken state.
+ *
+ * We treat these resources as leaked. The expected recovery path is for
+ * userspace to detect the failure and trigger a reboot, which will
+ * reliably reset devices and reclaim memory.
+ */
+ for (int i = 0; i < sh->header_ser->count; i++) {
+ struct luo_session *session;
+
+ session = luo_session_alloc(sh->ser[i].name);
+ if (IS_ERR(session)) {
+ pr_warn("Failed to allocate session [%s] during deserialization %pe\n",
+ sh->ser[i].name, session);
+ return PTR_ERR(session);
+ }
+
+ err = luo_session_insert(sh, session);
+ if (err) {
+ pr_warn("Failed to insert session [%s] %pe\n",
+ session->name, ERR_PTR(err));
+ luo_session_free(session);
+ return err;
+ }
+
+ scoped_guard(mutex, &session->mutex) {
+ luo_file_deserialize(&session->file_set,
+ &sh->ser[i].file_set_ser);
+ }
+ }
+
+ kho_restore_free(sh->header_ser);
+ sh->header_ser = NULL;
+ sh->ser = NULL;
+
+ return 0;
+}
+
+int luo_session_serialize(void)
+{
+ struct luo_session_header *sh = &luo_session_global.outgoing;
+ struct luo_session *session;
+ int i = 0;
+ int err;
+
+ guard(rwsem_write)(&sh->rwsem);
+ list_for_each_entry(session, &sh->list, list) {
+ err = luo_session_freeze_one(session, &sh->ser[i]);
+ if (err)
+ goto err_undo;
+
+ strscpy(sh->ser[i].name, session->name,
+ sizeof(sh->ser[i].name));
+ i++;
+ }
+ sh->header_ser->count = sh->count;
+
+ return 0;
+
+err_undo:
+ list_for_each_entry_continue_reverse(session, &sh->list, list) {
+ i--;
+ luo_session_unfreeze_one(session, &sh->ser[i]);
+ memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
+ }
+
+ return err;
+}
+
+/**
+ * luo_session_quiesce - Ensure no active sessions exist and lock session lists.
+ *
+ * Acquires exclusive write locks on both incoming and outgoing session lists.
+ * It then validates no sessions exist in either list.
+ *
+ * This mechanism is used during file handler un/registration to ensure that no
+ * sessions are currently using the handler, and no new sessions can be created
+ * while un/registration is in progress.
+ *
+ * This prevents registering new handlers while sessions are active or
+ * while deserialization is in progress.
+ *
+ * Return:
+ * true - System is quiescent (0 sessions) and locked.
+ * false - Active sessions exist. The locks are released internally.
+ */
+bool luo_session_quiesce(void)
+{
+ down_write(&luo_session_global.incoming.rwsem);
+ down_write(&luo_session_global.outgoing.rwsem);
+
+ if (luo_session_global.incoming.count ||
+ luo_session_global.outgoing.count) {
+ up_write(&luo_session_global.outgoing.rwsem);
+ up_write(&luo_session_global.incoming.rwsem);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * luo_session_resume - Unlock session lists and resume normal activity.
+ *
+ * Releases the exclusive locks acquired by a successful call to
+ * luo_session_quiesce().
+ */
+void luo_session_resume(void)
+{
+ up_write(&luo_session_global.outgoing.rwsem);
+ up_write(&luo_session_global.incoming.rwsem);
+}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 7b3ec2fa6e7c..710ee30b3bea 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -954,7 +954,7 @@ size_t module_flags_taint(unsigned long taints, char *buf)
int i;
for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
- if (taint_flags[i].module && test_bit(i, &taints))
+ if (test_bit(i, &taints))
buf[l++] = taint_flags[i].c_true;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index b2f2470af7e5..0d52210a9e2b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -401,7 +401,7 @@ static void panic_trigger_all_cpu_backtrace(void)
*/
static void panic_other_cpus_shutdown(bool crash_kexec)
{
- if (panic_print & SYS_INFO_ALL_CPU_BT)
+ if (panic_print & SYS_INFO_ALL_BT)
panic_trigger_all_cpu_backtrace();
/*
@@ -628,38 +628,40 @@ void panic(const char *fmt, ...)
}
EXPORT_SYMBOL(panic);
-#define TAINT_FLAG(taint, _c_true, _c_false, _module) \
+#define TAINT_FLAG(taint, _c_true, _c_false) \
[ TAINT_##taint ] = { \
.c_true = _c_true, .c_false = _c_false, \
- .module = _module, \
.desc = #taint, \
}
/*
- * TAINT_FORCED_RMMOD could be a per-module flag but the module
- * is being removed anyway.
+ * NOTE: if you modify the taint_flags or TAINT_FLAGS_COUNT,
+ * please also modify tools/debugging/kernel-chktaint and
+ * Documentation/admin-guide/tainted-kernels.rst, including its
+ * small shell script that prints the TAINT_FLAGS_COUNT bits of
+ * /proc/sys/kernel/tainted.
*/
const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
- TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G', true),
- TAINT_FLAG(FORCED_MODULE, 'F', ' ', true),
- TAINT_FLAG(CPU_OUT_OF_SPEC, 'S', ' ', false),
- TAINT_FLAG(FORCED_RMMOD, 'R', ' ', false),
- TAINT_FLAG(MACHINE_CHECK, 'M', ' ', false),
- TAINT_FLAG(BAD_PAGE, 'B', ' ', false),
- TAINT_FLAG(USER, 'U', ' ', false),
- TAINT_FLAG(DIE, 'D', ' ', false),
- TAINT_FLAG(OVERRIDDEN_ACPI_TABLE, 'A', ' ', false),
- TAINT_FLAG(WARN, 'W', ' ', false),
- TAINT_FLAG(CRAP, 'C', ' ', true),
- TAINT_FLAG(FIRMWARE_WORKAROUND, 'I', ' ', false),
- TAINT_FLAG(OOT_MODULE, 'O', ' ', true),
- TAINT_FLAG(UNSIGNED_MODULE, 'E', ' ', true),
- TAINT_FLAG(SOFTLOCKUP, 'L', ' ', false),
- TAINT_FLAG(LIVEPATCH, 'K', ' ', true),
- TAINT_FLAG(AUX, 'X', ' ', true),
- TAINT_FLAG(RANDSTRUCT, 'T', ' ', true),
- TAINT_FLAG(TEST, 'N', ' ', true),
- TAINT_FLAG(FWCTL, 'J', ' ', true),
+ TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G'),
+ TAINT_FLAG(FORCED_MODULE, 'F', ' '),
+ TAINT_FLAG(CPU_OUT_OF_SPEC, 'S', ' '),
+ TAINT_FLAG(FORCED_RMMOD, 'R', ' '),
+ TAINT_FLAG(MACHINE_CHECK, 'M', ' '),
+ TAINT_FLAG(BAD_PAGE, 'B', ' '),
+ TAINT_FLAG(USER, 'U', ' '),
+ TAINT_FLAG(DIE, 'D', ' '),
+ TAINT_FLAG(OVERRIDDEN_ACPI_TABLE, 'A', ' '),
+ TAINT_FLAG(WARN, 'W', ' '),
+ TAINT_FLAG(CRAP, 'C', ' '),
+ TAINT_FLAG(FIRMWARE_WORKAROUND, 'I', ' '),
+ TAINT_FLAG(OOT_MODULE, 'O', ' '),
+ TAINT_FLAG(UNSIGNED_MODULE, 'E', ' '),
+ TAINT_FLAG(SOFTLOCKUP, 'L', ' '),
+ TAINT_FLAG(LIVEPATCH, 'K', ' '),
+ TAINT_FLAG(AUX, 'X', ' '),
+ TAINT_FLAG(RANDSTRUCT, 'T', ' '),
+ TAINT_FLAG(TEST, 'N', ' '),
+ TAINT_FLAG(FWCTL, 'J', ' '),
};
#undef TAINT_FLAG
diff --git a/kernel/resource.c b/kernel/resource.c
index b9fa2a4ce089..e4e9bac12e6e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -341,6 +341,8 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
unsigned long flags, unsigned long desc,
struct resource *res)
{
+ /* Skip children until we find a top level range that matches */
+ bool skip_children = true;
struct resource *p;
if (!res)
@@ -351,7 +353,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
read_lock(&resource_lock);
- for_each_resource(&iomem_resource, p, false) {
+ for_each_resource(&iomem_resource, p, skip_children) {
/* If we passed the resource we are looking for, stop */
if (p->start > end) {
p = NULL;
@@ -362,6 +364,12 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
if (p->end < start)
continue;
+ /*
+ * We found a top level range that matches what we are looking
+ * for. Time to start checking children too.
+ */
+ skip_children = false;
+
/* Found a match, break */
if (is_type_match(p, flags, desc))
break;
diff --git a/kernel/scs.c b/kernel/scs.c
index d7809affe740..772488afd5b9 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -135,7 +135,7 @@ static void scs_check_usage(struct task_struct *tsk)
if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
return;
- for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
+ for (p = task_scs(tsk); p < __scs_magic(task_scs(tsk)); ++p) {
if (!READ_ONCE_NOCHECK(*p))
break;
used += sizeof(*p);
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index e066d31d08f8..fe9bf8db1922 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -31,6 +31,13 @@ u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
static unsigned char *vmcoreinfo_data_safecopy;
+struct hwerr_info {
+ atomic_t count;
+ time64_t timestamp;
+};
+
+static struct hwerr_info hwerr_data[HWERR_RECOV_MAX];
+
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len)
{
@@ -118,6 +125,16 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
}
EXPORT_SYMBOL(paddr_vmcoreinfo_note);
+void hwerr_log_error_type(enum hwerr_error_type src)
+{
+ if (src < 0 || src >= HWERR_RECOV_MAX)
+ return;
+
+ atomic_inc(&hwerr_data[src].count);
+ WRITE_ONCE(hwerr_data[src].timestamp, ktime_get_real_seconds());
+}
+EXPORT_SYMBOL_GPL(hwerr_log_error_type);
+
static int __init crash_save_vmcoreinfo_init(void)
{
vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index a567600cf3ed..0685e3a8aa0a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -25,6 +25,7 @@
#include <linux/stop_machine.h>
#include <linux/sysctl.h>
#include <linux/tick.h>
+#include <linux/sys_info.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
@@ -65,6 +66,13 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
unsigned int __read_mostly hardlockup_panic =
IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
+/*
+ * bitmasks to control what kinds of system info to be printed when
+ * hard lockup is detected, it could be task, memory, lock etc.
+ * Refer include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long hardlockup_si_mask;
+
#ifdef CONFIG_SYSFS
static unsigned int hardlockup_count;
@@ -178,11 +186,15 @@ static void watchdog_hardlockup_kick(void)
void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
{
+ int hardlockup_all_cpu_backtrace;
+
if (per_cpu(watchdog_hardlockup_touched, cpu)) {
per_cpu(watchdog_hardlockup_touched, cpu) = false;
return;
}
+ hardlockup_all_cpu_backtrace = (hardlockup_si_mask & SYS_INFO_ALL_BT) ?
+ 1 : sysctl_hardlockup_all_cpu_backtrace;
/*
* Check for a hardlockup by making sure the CPU's timer
* interrupt is incrementing. The timer interrupt should have
@@ -214,7 +226,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
* Prevent multiple hard-lockup reports if one cpu is already
* engaged in dumping all cpu back traces.
*/
- if (sysctl_hardlockup_all_cpu_backtrace) {
+ if (hardlockup_all_cpu_backtrace) {
if (test_and_set_bit_lock(0, &hard_lockup_nmi_warn))
return;
}
@@ -243,12 +255,13 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
trigger_single_cpu_backtrace(cpu);
}
- if (sysctl_hardlockup_all_cpu_backtrace) {
+ if (hardlockup_all_cpu_backtrace) {
trigger_allbutcpu_cpu_backtrace(cpu);
if (!hardlockup_panic)
clear_bit_unlock(0, &hard_lockup_nmi_warn);
}
+ sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT);
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
@@ -339,6 +352,13 @@ static void lockup_detector_update_enable(void)
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif
+/*
+ * bitmasks to control what kinds of system info to be printed when
+ * soft lockup is detected, it could be task, memory, lock etc.
+ * Refer include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long softlockup_si_mask;
+
static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
@@ -755,7 +775,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
unsigned long touch_ts, period_ts, now;
struct pt_regs *regs = get_irq_regs();
int duration;
- int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
+ int softlockup_all_cpu_backtrace;
unsigned long flags;
if (!watchdog_enabled)
@@ -767,6 +787,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
if (panic_in_progress())
return HRTIMER_NORESTART;
+ softlockup_all_cpu_backtrace = (softlockup_si_mask & SYS_INFO_ALL_BT) ?
+ 1 : sysctl_softlockup_all_cpu_backtrace;
+
watchdog_hardlockup_kick();
/* kick the softlockup detector */
@@ -855,6 +878,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT);
if (softlockup_panic)
panic("softlockup: hung tasks");
}
@@ -1206,6 +1230,13 @@ static const struct ctl_table watchdog_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "softlockup_sys_info",
+ .data = &softlockup_si_mask,
+ .maxlen = sizeof(softlockup_si_mask),
+ .mode = 0644,
+ .proc_handler = sysctl_sys_info_handler,
+ },
#ifdef CONFIG_SMP
{
.procname = "softlockup_all_cpu_backtrace",
@@ -1228,6 +1259,13 @@ static const struct ctl_table watchdog_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "hardlockup_sys_info",
+ .data = &hardlockup_si_mask,
+ .maxlen = sizeof(hardlockup_si_mask),
+ .mode = 0644,
+ .proc_handler = sysctl_sys_info_handler,
+ },
#ifdef CONFIG_SMP
{
.procname = "hardlockup_all_cpu_backtrace",
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c2654075377e..ba36939fda79 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -342,8 +342,7 @@ config DEBUG_INFO_COMPRESSED_ZLIB
depends on $(cc-option,-gz=zlib)
depends on $(ld-option,--compress-debug-sections=zlib)
help
- Compress the debug information using zlib. Requires GCC 5.0+ or Clang
- 5.0+, binutils 2.26+, and zlib.
+ Compress the debug information using zlib.
Users of dpkg-deb via debian/rules may find an increase in
size of their debug .deb packages with this config set, due to the
@@ -493,23 +492,23 @@ config DEBUG_SECTION_MISMATCH
bool "Enable full Section mismatch analysis"
depends on CC_IS_GCC
help
- The section mismatch analysis checks if there are illegal
- references from one section to another section.
- During linktime or runtime, some sections are dropped;
- any use of code/data previously in these sections would
- most likely result in an oops.
- In the code, functions and variables are annotated with
- __init,, etc. (see the full list in include/linux/init.h),
- which results in the code/data being placed in specific sections.
+ The section mismatch analysis checks if there are illegal references
+ from one section to another. During linktime or runtime, some
+ sections are dropped; any use of code/data previously in these
+ sections would most likely result in an oops.
+
+ In the code, functions and variables are annotated with __init,
+ __initdata, and so on (see the full list in include/linux/init.h).
+ This directs the toolchain to place code/data in specific sections.
+
The section mismatch analysis is always performed after a full
- kernel build, and enabling this option causes the following
- additional step to occur:
- - Add the option -fno-inline-functions-called-once to gcc commands.
- When inlining a function annotated with __init in a non-init
- function, we would lose the section information and thus
- the analysis would not catch the illegal reference.
- This option tells gcc to inline less (but it does result in
- a larger kernel).
+ kernel build, and enabling this option causes the option
+ -fno-inline-functions-called-once to be added to gcc commands.
+
+ However, when inlining a function annotated with __init in
+ a non-init function, we would lose the section information and thus
+ the analysis would not catch the illegal reference. This option
+ tells gcc to inline less (but it does result in a larger kernel).
config SECTION_MISMATCH_WARN_ONLY
bool "Make section mismatch errors non-fatal"
@@ -1260,12 +1259,13 @@ config DEFAULT_HUNG_TASK_TIMEOUT
Keeping the default should be fine in most cases.
config BOOTPARAM_HUNG_TASK_PANIC
- bool "Panic (Reboot) On Hung Tasks"
+ int "Number of hung tasks to trigger kernel panic"
depends on DETECT_HUNG_TASK
+ default 0
help
- Say Y here to enable the kernel to panic on "hung tasks",
- which are bugs that cause the kernel to leave a task stuck
- in uninterruptible "D" state.
+ When set to a non-zero value, a kernel panic will be triggered
+ if the number of hung tasks found during a single scan reaches
+ this value.
The panic can be used in combination with panic_timeout,
to cause the system to reboot automatically after a
@@ -2817,8 +2817,25 @@ config CMDLINE_KUNIT_TEST
If unsure, say N.
+config BASE64_KUNIT
+ tristate "KUnit test for base64 decoding and encoding" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds the base64 unit tests.
+
+ The tests cover the encoding and decoding logic of Base64 functions
+ in the kernel.
+ In addition to correctness checks, simple performance benchmarks
+ for both encoding and decoding are also included.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
config BITS_TEST
- tristate "KUnit test for bits.h" if !KUNIT_ALL_TESTS
+ tristate "KUnit test for bit functions and macros" if !KUNIT_ALL_TESTS
depends on KUNIT
default KUNIT_ALL_TESTS
help
diff --git a/lib/base64.c b/lib/base64.c
index b736a7a431c5..41961a444028 100644
--- a/lib/base64.c
+++ b/lib/base64.c
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * base64.c - RFC4648-compliant base64 encoding
+ * base64.c - Base64 with support for multiple variants
*
* Copyright (c) 2020 Hannes Reinecke, SUSE
*
* Based on the base64url routines from fs/crypto/fname.c
- * (which are using the URL-safe base64 encoding),
- * modified to use the standard coding table from RFC4648 section 4.
+ * (which are using the URL-safe Base64 encoding),
+ * modified to support multiple Base64 variants.
*/
#include <linux/kernel.h>
@@ -15,89 +15,170 @@
#include <linux/string.h>
#include <linux/base64.h>
-static const char base64_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char base64_tables[][65] = {
+ [BASE64_STD] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
+ [BASE64_URLSAFE] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
+ [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
+};
+/*
+ * Initialize the base64 reverse mapping for a single character
+ * This macro maps a character to its corresponding base64 value,
+ * returning -1 if the character is invalid.
+ * char 'A'-'Z' maps to 0-25, 'a'-'z' maps to 26-51, '0'-'9' maps to 52-61,
+ * ch_62 maps to 62, ch_63 maps to 63, and other characters return -1
+ */
+#define INIT_1(v, ch_62, ch_63) \
+ [v] = (v) >= 'A' && (v) <= 'Z' ? (v) - 'A' \
+ : (v) >= 'a' && (v) <= 'z' ? (v) - 'a' + 26 \
+ : (v) >= '0' && (v) <= '9' ? (v) - '0' + 52 \
+ : (v) == (ch_62) ? 62 : (v) == (ch_63) ? 63 : -1
+
+/*
+ * Recursive macros to generate multiple Base64 reverse mapping table entries.
+ * Each macro generates a sequence of entries in the lookup table:
+ * INIT_2 generates 2 entries, INIT_4 generates 4, INIT_8 generates 8, and so on up to INIT_32.
+ */
+#define INIT_2(v, ...) INIT_1(v, __VA_ARGS__), INIT_1((v) + 1, __VA_ARGS__)
+#define INIT_4(v, ...) INIT_2(v, __VA_ARGS__), INIT_2((v) + 2, __VA_ARGS__)
+#define INIT_8(v, ...) INIT_4(v, __VA_ARGS__), INIT_4((v) + 4, __VA_ARGS__)
+#define INIT_16(v, ...) INIT_8(v, __VA_ARGS__), INIT_8((v) + 8, __VA_ARGS__)
+#define INIT_32(v, ...) INIT_16(v, __VA_ARGS__), INIT_16((v) + 16, __VA_ARGS__)
+
+#define BASE64_REV_INIT(ch_62, ch_63) { \
+ [0 ... 0x1f] = -1, \
+ INIT_32(0x20, ch_62, ch_63), \
+ INIT_32(0x40, ch_62, ch_63), \
+ INIT_32(0x60, ch_62, ch_63), \
+ [0x80 ... 0xff] = -1 }
+
+static const s8 base64_rev_maps[][256] = {
+ [BASE64_STD] = BASE64_REV_INIT('+', '/'),
+ [BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
+ [BASE64_IMAP] = BASE64_REV_INIT('+', ',')
+};
+
+#undef BASE64_REV_INIT
+#undef INIT_32
+#undef INIT_16
+#undef INIT_8
+#undef INIT_4
+#undef INIT_2
+#undef INIT_1
/**
- * base64_encode() - base64-encode some binary data
+ * base64_encode() - Base64-encode some binary data
* @src: the binary data to encode
* @srclen: the length of @src in bytes
- * @dst: (output) the base64-encoded string. Not NUL-terminated.
+ * @dst: (output) the Base64-encoded string. Not NUL-terminated.
+ * @padding: whether to append '=' padding characters
+ * @variant: which base64 variant to use
*
- * Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified
- * by RFC 4648, including the '='-padding.
+ * Encodes data using the selected Base64 variant.
*
- * Return: the length of the resulting base64-encoded string in bytes.
+ * Return: the length of the resulting Base64-encoded string in bytes.
*/
-int base64_encode(const u8 *src, int srclen, char *dst)
+int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum base64_variant variant)
{
u32 ac = 0;
- int bits = 0;
- int i;
char *cp = dst;
+ const char *base64_table = base64_tables[variant];
- for (i = 0; i < srclen; i++) {
- ac = (ac << 8) | src[i];
- bits += 8;
- do {
- bits -= 6;
- *cp++ = base64_table[(ac >> bits) & 0x3f];
- } while (bits >= 6);
- }
- if (bits) {
- *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
- bits -= 6;
+ while (srclen >= 3) {
+ ac = src[0] << 16 | src[1] << 8 | src[2];
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ *cp++ = base64_table[(ac >> 6) & 0x3f];
+ *cp++ = base64_table[ac & 0x3f];
+
+ src += 3;
+ srclen -= 3;
}
- while (bits < 0) {
- *cp++ = '=';
- bits += 2;
+
+ switch (srclen) {
+ case 2:
+ ac = src[0] << 16 | src[1] << 8;
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ *cp++ = base64_table[(ac >> 6) & 0x3f];
+ if (padding)
+ *cp++ = '=';
+ break;
+ case 1:
+ ac = src[0] << 16;
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ if (padding) {
+ *cp++ = '=';
+ *cp++ = '=';
+ }
+ break;
}
return cp - dst;
}
EXPORT_SYMBOL_GPL(base64_encode);
/**
- * base64_decode() - base64-decode a string
+ * base64_decode() - Base64-decode a string
* @src: the string to decode. Doesn't need to be NUL-terminated.
* @srclen: the length of @src in bytes
* @dst: (output) the decoded binary data
+ * @padding: whether to append '=' padding characters
+ * @variant: which base64 variant to use
*
- * Decodes a string using base64 encoding, i.e. the "Base 64 Encoding"
- * specified by RFC 4648, including the '='-padding.
- *
- * This implementation hasn't been optimized for performance.
+ * Decodes a string using the selected Base64 variant.
*
* Return: the length of the resulting decoded binary data in bytes,
- * or -1 if the string isn't a valid base64 string.
+ * or -1 if the string isn't a valid Base64 string.
*/
-int base64_decode(const char *src, int srclen, u8 *dst)
+int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
{
- u32 ac = 0;
- int bits = 0;
- int i;
u8 *bp = dst;
+ s8 input[4];
+ s32 val;
+ const u8 *s = (const u8 *)src;
+ const s8 *base64_rev_tables = base64_rev_maps[variant];
- for (i = 0; i < srclen; i++) {
- const char *p = strchr(base64_table, src[i]);
+ while (srclen >= 4) {
+ input[0] = base64_rev_tables[s[0]];
+ input[1] = base64_rev_tables[s[1]];
+ input[2] = base64_rev_tables[s[2]];
+ input[3] = base64_rev_tables[s[3]];
- if (src[i] == '=') {
- ac = (ac << 6);
- bits += 6;
- if (bits >= 8)
- bits -= 8;
- continue;
- }
- if (p == NULL || src[i] == 0)
- return -1;
- ac = (ac << 6) | (p - base64_table);
- bits += 6;
- if (bits >= 8) {
- bits -= 8;
- *bp++ = (u8)(ac >> bits);
+ val = input[0] << 18 | input[1] << 12 | input[2] << 6 | input[3];
+
+ if (unlikely(val < 0)) {
+ if (!padding || srclen != 4 || s[3] != '=')
+ return -1;
+ padding = 0;
+ srclen = s[2] == '=' ? 2 : 3;
+ break;
}
+
+ *bp++ = val >> 16;
+ *bp++ = val >> 8;
+ *bp++ = val;
+
+ s += 4;
+ srclen -= 4;
}
- if (ac & ((1 << bits) - 1))
+
+ if (likely(!srclen))
+ return bp - dst;
+ if (padding || srclen == 1)
return -1;
+
+ val = (base64_rev_tables[s[0]] << 12) | (base64_rev_tables[s[1]] << 6);
+ *bp++ = val >> 10;
+
+ if (srclen == 2) {
+ if (val & 0x800003ff)
+ return -1;
+ } else {
+ val |= base64_rev_tables[s[2]];
+ if (val & 0x80000003)
+ return -1;
+ *bp++ = val >> 2;
+ }
return bp - dst;
}
EXPORT_SYMBOL_GPL(base64_decode);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 5a007952f7f2..7d7892e57a01 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -95,6 +95,7 @@ static const struct { unsigned flag:8; char opt_char; } opt_array[] = {
{ _DPRINTK_FLAGS_INCL_SOURCENAME, 's' },
{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
{ _DPRINTK_FLAGS_INCL_TID, 't' },
+ { _DPRINTK_FLAGS_INCL_STACK, 'd' },
{ _DPRINTK_FLAGS_NONE, '_' },
};
diff --git a/lib/math/div64.c b/lib/math/div64.c
index bf77b9843175..d1e92ea24fce 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -177,94 +177,157 @@ EXPORT_SYMBOL(div64_s64);
* Iterative div/mod for use when dividend is not expected to be much
* bigger than divisor.
*/
+#ifndef iter_div_u64_rem
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
return __iter_div_u64_rem(dividend, divisor, remainder);
}
EXPORT_SYMBOL(iter_div_u64_rem);
+#endif
-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
-{
- if (ilog2(a) + ilog2(b) <= 62)
- return div64_u64(a * b, c);
+#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
-#if defined(__SIZEOF_INT128__)
+#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
+#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
+static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
+{
/* native 64x64=128 bits multiplication */
- u128 prod = (u128)a * b;
- u64 n_lo = prod, n_hi = prod >> 64;
+ u128 prod = (u128)a * b + c;
+ *p_lo = prod;
+ return prod >> 64;
+}
#else
-
- /* perform a 64x64=128 bits multiplication manually */
- u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
+static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
+{
+ /* perform a 64x64=128 bits multiplication in 32bit chunks */
u64 x, y, z;
- x = (u64)a_lo * b_lo;
- y = (u64)a_lo * b_hi + (u32)(x >> 32);
- z = (u64)a_hi * b_hi + (u32)(y >> 32);
- y = (u64)a_hi * b_lo + (u32)y;
- z += (u32)(y >> 32);
- x = (y << 32) + (u32)x;
-
- u64 n_lo = x, n_hi = z;
+ /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
+ x = mul_add(a, b, c);
+ y = mul_add(a, b >> 32, c >> 32);
+ y = add_u64_u32(y, x >> 32);
+ z = mul_add(a >> 32, b >> 32, y >> 32);
+ y = mul_add(a >> 32, b, y);
+ *p_lo = (y << 32) + (u32)x;
+ return add_u64_u32(z, y >> 32);
+}
+#endif
+#ifndef BITS_PER_ITER
+#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
#endif
- /* make sure c is not zero, trigger runtime exception otherwise */
- if (unlikely(c == 0)) {
- unsigned long zero = 0;
+#if BITS_PER_ITER == 32
+#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
+#define add_u64_long(a, b) ((a) + (b))
+#else
+#undef BITS_PER_ITER
+#define BITS_PER_ITER 16
+static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
+{
+ u64 n_lo = mul_add(a, b, c);
+ u64 n_med = mul_add(a >> 32, b, c >> 32);
- OPTIMIZER_HIDE_VAR(zero);
- return ~0UL/zero;
- }
+ n_med = add_u64_u32(n_med, n_lo >> 32);
+ *p_lo = n_med << 32 | (u32)n_lo;
+ return n_med >> 32;
+}
+
+#define add_u64_long(a, b) add_u64_u32(a, b)
+#endif
+
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
+{
+ unsigned long d_msig, q_digit;
+ unsigned int reps, d_z_hi;
+ u64 quotient, n_lo, n_hi;
+ u32 overflow;
- int shift = __builtin_ctzll(c);
+ n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
- /* try reducing the fraction in case the dividend becomes <= 64 bits */
- if ((n_hi >> shift) == 0) {
- u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
+ if (!n_hi)
+ return div64_u64(n_lo, d);
- return div64_u64(n, c >> shift);
- /*
- * The remainder value if needed would be:
- * res = div64_u64_rem(n, c >> shift, &rem);
- * rem = (rem << shift) + (n_lo - (n << shift));
- */
- }
+ if (unlikely(n_hi >= d)) {
+ /* trigger runtime exception if divisor is zero */
+ if (d == 0) {
+ unsigned long zero = 0;
- if (n_hi >= c) {
+ OPTIMIZER_HIDE_VAR(zero);
+ return ~0UL/zero;
+ }
/* overflow: result is unrepresentable in a u64 */
- return -1;
+ return ~0ULL;
}
- /* Do the full 128 by 64 bits division */
-
- shift = __builtin_clzll(c);
- c <<= shift;
+ /* Left align the divisor, shifting the dividend to match */
+ d_z_hi = __builtin_clzll(d);
+ if (d_z_hi) {
+ d <<= d_z_hi;
+ n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
+ n_lo <<= d_z_hi;
+ }
- int p = 64 + shift;
- u64 res = 0;
- bool carry;
+ reps = 64 / BITS_PER_ITER;
+ /* Optimise loop count for small dividends */
+ if (!(u32)(n_hi >> 32)) {
+ reps -= 32 / BITS_PER_ITER;
+ n_hi = n_hi << 32 | n_lo >> 32;
+ n_lo <<= 32;
+ }
+#if BITS_PER_ITER == 16
+ if (!(u32)(n_hi >> 48)) {
+ reps--;
+ n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
+ n_lo <<= 16;
+ }
+#endif
- do {
- carry = n_hi >> 63;
- shift = carry ? 1 : __builtin_clzll(n_hi);
- if (p < shift)
- break;
- p -= shift;
- n_hi <<= shift;
- n_hi |= n_lo >> (64 - shift);
- n_lo <<= shift;
- if (carry || (n_hi >= c)) {
- n_hi -= c;
- res |= 1ULL << p;
+ /* Invert the dividend so we can use add instead of subtract. */
+ n_lo = ~n_lo;
+ n_hi = ~n_hi;
+
+ /*
+ * Get the most significant BITS_PER_ITER bits of the divisor.
+ * This is used to get a low 'guestimate' of the quotient digit.
+ */
+ d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
+
+ /*
+ * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
+ * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
+ * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
+ */
+ quotient = 0;
+ while (reps--) {
+ q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
+ /* Shift 'n' left to align with the product q_digit * d */
+ overflow = n_hi >> (64 - BITS_PER_ITER);
+ n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
+ n_lo <<= BITS_PER_ITER;
+ /* Add product to negated divisor */
+ overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
+ /* Adjust for the q_digit 'guestimate' being low */
+ while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
+ q_digit++;
+ n_hi += d;
+ overflow += n_hi < d;
}
- } while (n_hi);
- /* The remainder value if needed would be n_hi << p */
+ quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
+ }
- return res;
+ /*
+ * The above only ensures the remainder doesn't overflow,
+ * it can still be possible to add (aka subtract) another copy
+ * of the divisor.
+ */
+ if ((n_hi + d) > n_hi)
+ quotient++;
+ return quotient;
}
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+#if !defined(test_mul_u64_add_u64_div_u64)
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
+#endif
#endif
diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c
index 58d058de4e73..338d014f0c73 100644
--- a/lib/math/test_mul_u64_u64_div_u64.c
+++ b/lib/math/test_mul_u64_u64_div_u64.c
@@ -10,80 +10,141 @@
#include <linux/printk.h>
#include <linux/math64.h>
-typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params;
+typedef struct { u64 a; u64 b; u64 d; u64 result; uint round_up;} test_params;
static test_params test_values[] = {
/* this contains many edge values followed by a couple random values */
-{ 0xb, 0x7, 0x3, 0x19 },
-{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000 },
-{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001 },
-{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000 },
-{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000 },
-{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab },
-{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000 },
-{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff },
-{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 },
-{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554 },
-{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3 },
-{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2 },
-{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 },
-{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 },
-{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 },
-{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 },
-{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca },
-{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b },
-{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 },
-{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe },
-{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 },
-{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 },
-{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f },
-{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 },
-{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 },
+{ 0xb, 0x7, 0x3, 0x19, 1 },
+{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000, 0 },
+{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001, 0 },
+{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000, 1 },
+{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000, 1 },
+{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab, 0 },
+{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000, 1 },
+{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff, 1 },
+{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851, 1 },
+{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554, 1 },
+{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3, 1 },
+{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2, 1 },
+{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007, 1 },
+{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001, 0 },
+{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002, 1 },
+{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8, 1 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca, 1 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b, 1 },
+{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9, 1 },
+{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe, 0 },
+{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18, 1 },
+{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35, 1 },
+{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f, 1 },
+{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961, 1 },
+{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216, 1 },
};
/*
* The above table can be verified with the following shell script:
- *
- * #!/bin/sh
- * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \
- * lib/math/test_mul_u64_u64_div_u64.c |
- * while read a b c r; do
- * expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc )
- * given=$( printf "%X\n" $r )
- * if [ "$expected" = "$given" ]; then
- * echo "$a * $b / $c = $r OK"
- * else
- * echo "$a * $b / $c = $r is wrong" >&2
- * echo "should be equivalent to 0x$expected" >&2
- * exit 1
- * fi
- * done
+
+#!/bin/sh
+sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4 \5/p' \
+ lib/math/test_mul_u64_u64_div_u64.c |
+while read a b d r e; do
+ expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $d | bc )
+ given=$( printf "%X\n" $r )
+ if [ "$expected" = "$given" ]; then
+ echo "$a * $b / $d = $r OK"
+ else
+ echo "$a * $b / $d = $r is wrong" >&2
+ echo "should be equivalent to 0x$expected" >&2
+ exit 1
+ fi
+ expected=$( printf "obase=16; ibase=16; (%X * %X + %X) / %X\n" $a $b $((d-1)) $d | bc )
+ given=$( printf "%X\n" $((r + e)) )
+ if [ "$expected" = "$given" ]; then
+ echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) OK"
+ else
+ echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) is wrong" >&2
+ echo "should be equivalent to 0x$expected" >&2
+ exit 1
+ fi
+done
+
*/
-static int __init test_init(void)
+static u64 test_mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d);
+#if __LONG_WIDTH__ >= 64
+#define TEST_32BIT_DIV
+static u64 test_mul_u64_add_u64_div_u64_32bit(u64 a, u64 b, u64 c, u64 d);
+#endif
+
+static int __init test_run(unsigned int fn_no, const char *fn_name)
{
+ u64 start_time;
+ int errors = 0;
+ int tests = 0;
int i;
- pr_info("Starting mul_u64_u64_div_u64() test\n");
+ start_time = ktime_get_ns();
for (i = 0; i < ARRAY_SIZE(test_values); i++) {
u64 a = test_values[i].a;
u64 b = test_values[i].b;
- u64 c = test_values[i].c;
+ u64 d = test_values[i].d;
u64 expected_result = test_values[i].result;
- u64 result = mul_u64_u64_div_u64(a, b, c);
+ u64 result, result_up;
+
+ switch (fn_no) {
+ default:
+ result = mul_u64_u64_div_u64(a, b, d);
+ result_up = mul_u64_u64_div_u64_roundup(a, b, d);
+ break;
+ case 1:
+ result = test_mul_u64_add_u64_div_u64(a, b, 0, d);
+ result_up = test_mul_u64_add_u64_div_u64(a, b, d - 1, d);
+ break;
+#ifdef TEST_32BIT_DIV
+ case 2:
+ result = test_mul_u64_add_u64_div_u64_32bit(a, b, 0, d);
+ result_up = test_mul_u64_add_u64_div_u64_32bit(a, b, d - 1, d);
+ break;
+#endif
+ }
+
+ tests += 2;
if (result != expected_result) {
- pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c);
+ pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, d);
pr_err("ERROR: expected result: %016llx\n", expected_result);
pr_err("ERROR: obtained result: %016llx\n", result);
+ errors++;
+ }
+ expected_result += test_values[i].round_up;
+ if (result_up != expected_result) {
+ pr_err("ERROR: 0x%016llx * 0x%016llx +/ 0x%016llx\n", a, b, d);
+ pr_err("ERROR: expected result: %016llx\n", expected_result);
+ pr_err("ERROR: obtained result: %016llx\n", result_up);
+ errors++;
}
}
- pr_info("Completed mul_u64_u64_div_u64() test\n");
+ pr_info("Completed %s() test, %d tests, %d errors, %llu ns\n",
+ fn_name, tests, errors, ktime_get_ns() - start_time);
+ return errors;
+}
+
+static int __init test_init(void)
+{
+ pr_info("Starting mul_u64_u64_div_u64() test\n");
+ if (test_run(0, "mul_u64_u64_div_u64"))
+ return -EINVAL;
+ if (test_run(1, "test_mul_u64_u64_div_u64"))
+ return -EINVAL;
+#ifdef TEST_32BIT_DIV
+ if (test_run(2, "test_mul_u64_u64_div_u64_32bit"))
+ return -EINVAL;
+#endif
return 0;
}
@@ -91,6 +152,36 @@ static void __exit test_exit(void)
{
}
+/* Compile the generic mul_u64_add_u64_div_u64() code */
+#undef __div64_32
+#define __div64_32 __div64_32
+#define div_s64_rem div_s64_rem
+#define div64_u64_rem div64_u64_rem
+#define div64_u64 div64_u64
+#define div64_s64 div64_s64
+#define iter_div_u64_rem iter_div_u64_rem
+
+#undef mul_u64_add_u64_div_u64
+#define mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64
+#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64
+
+#include "div64.c"
+
+#ifdef TEST_32BIT_DIV
+/* Recompile the generic code for 32bit long */
+#undef test_mul_u64_add_u64_div_u64
+#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64_32bit
+#undef BITS_PER_ITER
+#define BITS_PER_ITER 16
+
+#define mul_u64_u64_add_u64 mul_u64_u64_add_u64_32bit
+#undef mul_u64_long_add_u64
+#undef add_u64_long
+#undef mul_add
+
+#include "div64.c"
+#endif
+
module_init(test_init);
module_exit(test_exit);
diff --git a/lib/plist.c b/lib/plist.c
index 330febb4bd7d..ba677c31e8f3 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -47,8 +47,8 @@ static void plist_check_list(struct list_head *top)
plist_check_prev_next(top, prev, next);
while (next != top) {
- WRITE_ONCE(prev, next);
- WRITE_ONCE(next, prev->next);
+ prev = next;
+ next = prev->next;
plist_check_prev_next(top, prev, next);
}
}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 859c251b23ce..e2d65d3b1c35 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -27,7 +27,7 @@
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
/* Paired with WRITE_ONCE() in .proc_handler().
- * Changing two values seperately could be inconsistent
+ * Changing two values separately could be inconsistent
* and some message could be lost. (See: net_ratelimit_state).
*/
int interval = READ_ONCE(rs->interval);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 5114eda6309c..18d42bcf4ec9 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -460,35 +460,6 @@ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
}
EXPORT_SYMBOL(__rb_insert_augmented);
-/*
- * This function returns the first node (in sort order) of the tree.
- */
-struct rb_node *rb_first(const struct rb_root *root)
-{
- struct rb_node *n;
-
- n = root->rb_node;
- if (!n)
- return NULL;
- while (n->rb_left)
- n = n->rb_left;
- return n;
-}
-EXPORT_SYMBOL(rb_first);
-
-struct rb_node *rb_last(const struct rb_root *root)
-{
- struct rb_node *n;
-
- n = root->rb_node;
- if (!n)
- return NULL;
- while (n->rb_right)
- n = n->rb_right;
- return n;
-}
-EXPORT_SYMBOL(rb_last);
-
struct rb_node *rb_next(const struct rb_node *node)
{
struct rb_node *parent;
diff --git a/lib/sys_info.c b/lib/sys_info.c
index 496f9151c9b6..f32a06ec9ed4 100644
--- a/lib/sys_info.c
+++ b/lib/sys_info.c
@@ -1,31 +1,35 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/sched/debug.h>
+#include <linux/array_size.h>
+#include <linux/bitops.h>
+#include <linux/cleanup.h>
#include <linux/console.h>
+#include <linux/log2.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
-#include <linux/sysctl.h>
#include <linux/nmi.h>
+#include <linux/sched/debug.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
#include <linux/sys_info.h>
-struct sys_info_name {
- unsigned long bit;
- const char *name;
+static const char * const si_names[] = {
+ [ilog2(SYS_INFO_TASKS)] = "tasks",
+ [ilog2(SYS_INFO_MEM)] = "mem",
+ [ilog2(SYS_INFO_TIMERS)] = "timers",
+ [ilog2(SYS_INFO_LOCKS)] = "locks",
+ [ilog2(SYS_INFO_FTRACE)] = "ftrace",
+ [ilog2(SYS_INFO_PANIC_CONSOLE_REPLAY)] = "",
+ [ilog2(SYS_INFO_ALL_BT)] = "all_bt",
+ [ilog2(SYS_INFO_BLOCKED_TASKS)] = "blocked_tasks",
};
/*
- * When 'si_names' gets updated, please make sure the 'sys_info_avail'
- * below is updated accordingly.
+ * Default kernel sys_info mask.
+ * If a kernel module calls sys_info() with "parameter == 0", then
+ * this mask will be used.
*/
-static const struct sys_info_name si_names[] = {
- { SYS_INFO_TASKS, "tasks" },
- { SYS_INFO_MEM, "mem" },
- { SYS_INFO_TIMERS, "timers" },
- { SYS_INFO_LOCKS, "locks" },
- { SYS_INFO_FTRACE, "ftrace" },
- { SYS_INFO_ALL_CPU_BT, "all_bt" },
- { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" },
-};
+static unsigned long kernel_si_mask;
/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */
unsigned long sys_info_parse_param(char *str)
@@ -36,12 +40,9 @@ unsigned long sys_info_parse_param(char *str)
s = str;
while ((name = strsep(&s, ",")) && *name) {
- for (i = 0; i < ARRAY_SIZE(si_names); i++) {
- if (!strcmp(name, si_names[i].name)) {
- si_bits |= si_names[i].bit;
- break;
- }
- }
+ i = match_string(si_names, ARRAY_SIZE(si_names), name);
+ if (i >= 0)
+ __set_bit(i, &si_bits);
}
return si_bits;
@@ -49,56 +50,93 @@ unsigned long sys_info_parse_param(char *str)
#ifdef CONFIG_SYSCTL
-static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks";
+static int sys_info_write_handler(const struct ctl_table *table,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ unsigned long *si_bits_global)
+{
+ unsigned long si_bits;
+ int ret;
+
+ ret = proc_dostring(table, 1, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ si_bits = sys_info_parse_param(table->data);
+
+ /* The access to the global value is not synchronized. */
+ WRITE_ONCE(*si_bits_global, si_bits);
+
+ return 0;
+}
+
+static int sys_info_read_handler(const struct ctl_table *table,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ unsigned long *si_bits_global)
+{
+ unsigned long si_bits;
+ unsigned int len = 0;
+ char *delim = "";
+ unsigned int i;
+
+ /* The access to the global value is not synchronized. */
+ si_bits = READ_ONCE(*si_bits_global);
+
+ for_each_set_bit(i, &si_bits, ARRAY_SIZE(si_names)) {
+ if (*si_names[i]) {
+ len += scnprintf(table->data + len, table->maxlen - len,
+ "%s%s", delim, si_names[i]);
+ delim = ",";
+ }
+ }
+
+ return proc_dostring(table, 0, buffer, lenp, ppos);
+}
int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
- char names[sizeof(sys_info_avail)];
struct ctl_table table;
- unsigned long *si_bits_global;
-
- si_bits_global = ro_table->data;
-
- if (write) {
- unsigned long si_bits;
- int ret;
-
- table = *ro_table;
- table.data = names;
- table.maxlen = sizeof(names);
- ret = proc_dostring(&table, write, buffer, lenp, ppos);
- if (ret)
- return ret;
-
- si_bits = sys_info_parse_param(names);
- /* The access to the global value is not synchronized. */
- WRITE_ONCE(*si_bits_global, si_bits);
- return 0;
- } else {
- /* for 'read' operation */
- char *delim = "";
- int i, len = 0;
-
- names[0] = '\0';
- for (i = 0; i < ARRAY_SIZE(si_names); i++) {
- if (*si_bits_global & si_names[i].bit) {
- len += scnprintf(names + len, sizeof(names) - len,
- "%s%s", delim, si_names[i].name);
- delim = ",";
- }
- }
+ unsigned int i;
+ size_t maxlen;
- table = *ro_table;
- table.data = names;
- table.maxlen = sizeof(names);
- return proc_dostring(&table, write, buffer, lenp, ppos);
- }
+ maxlen = 0;
+ for (i = 0; i < ARRAY_SIZE(si_names); i++)
+ maxlen += strlen(si_names[i]) + 1;
+
+ char *names __free(kfree) = kzalloc(maxlen, GFP_KERNEL);
+ if (!names)
+ return -ENOMEM;
+
+ table = *ro_table;
+ table.data = names;
+ table.maxlen = maxlen;
+
+ if (write)
+ return sys_info_write_handler(&table, buffer, lenp, ppos, ro_table->data);
+ else
+ return sys_info_read_handler(&table, buffer, lenp, ppos, ro_table->data);
+}
+
+static const struct ctl_table sys_info_sysctls[] = {
+ {
+ .procname = "kernel_sys_info",
+ .data = &kernel_si_mask,
+ .maxlen = sizeof(kernel_si_mask),
+ .mode = 0644,
+ .proc_handler = sysctl_sys_info_handler,
+ },
+};
+
+static int __init sys_info_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sys_info_sysctls);
+ return 0;
}
+subsys_initcall(sys_info_sysctl_init);
#endif
-void sys_info(unsigned long si_mask)
+static void __sys_info(unsigned long si_mask)
{
if (si_mask & SYS_INFO_TASKS)
show_state();
@@ -115,9 +153,14 @@ void sys_info(unsigned long si_mask)
if (si_mask & SYS_INFO_FTRACE)
ftrace_dump(DUMP_ALL);
- if (si_mask & SYS_INFO_ALL_CPU_BT)
+ if (si_mask & SYS_INFO_ALL_BT)
trigger_all_cpu_backtrace();
if (si_mask & SYS_INFO_BLOCKED_TASKS)
show_state_filter(TASK_UNINTERRUPTIBLE);
}
+
+void sys_info(unsigned long si_mask)
+{
+ __sys_info(si_mask ? : kernel_si_mask);
+}
diff --git a/lib/test_kho.c b/lib/test_kho.c
index fff018e5548d..47de56280795 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -33,44 +33,28 @@ struct kho_test_state {
unsigned int nr_folios;
struct folio **folios;
phys_addr_t *folios_info;
+ struct kho_vmalloc folios_info_phys;
+ int nr_folios_preserved;
struct folio *fdt;
__wsum csum;
};
static struct kho_test_state kho_test_state;
-static int kho_test_notifier(struct notifier_block *self, unsigned long cmd,
- void *v)
+static void kho_test_unpreserve_data(struct kho_test_state *state)
{
- struct kho_test_state *state = &kho_test_state;
- struct kho_serialization *ser = v;
- int err = 0;
-
- switch (cmd) {
- case KEXEC_KHO_ABORT:
- return NOTIFY_DONE;
- case KEXEC_KHO_FINALIZE:
- /* Handled below */
- break;
- default:
- return NOTIFY_BAD;
- }
-
- err |= kho_preserve_folio(state->fdt);
- err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt));
+ for (int i = 0; i < state->nr_folios_preserved; i++)
+ kho_unpreserve_folio(state->folios[i]);
- return err ? NOTIFY_BAD : NOTIFY_DONE;
+ kho_unpreserve_vmalloc(&state->folios_info_phys);
+ vfree(state->folios_info);
}
-static struct notifier_block kho_test_nb = {
- .notifier_call = kho_test_notifier,
-};
-
-static int kho_test_save_data(struct kho_test_state *state, void *fdt)
+static int kho_test_preserve_data(struct kho_test_state *state)
{
- phys_addr_t *folios_info __free(kvfree) = NULL;
struct kho_vmalloc folios_info_phys;
- int err = 0;
+ phys_addr_t *folios_info;
+ int err;
folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info));
if (!folios_info)
@@ -78,62 +62,98 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt)
err = kho_preserve_vmalloc(folios_info, &folios_info_phys);
if (err)
- return err;
+ goto err_free_info;
+
+ state->folios_info_phys = folios_info_phys;
+ state->folios_info = folios_info;
for (int i = 0; i < state->nr_folios; i++) {
struct folio *folio = state->folios[i];
unsigned int order = folio_order(folio);
folios_info[i] = virt_to_phys(folio_address(folio)) | order;
-
err = kho_preserve_folio(folio);
if (err)
- break;
+ goto err_unpreserve;
+ state->nr_folios_preserved++;
}
+ return 0;
+
+err_unpreserve:
+ /*
+ * kho_test_unpreserve_data frees folio_info, bail out immediately to
+ * avoid double free
+ */
+ kho_test_unpreserve_data(state);
+ return err;
+
+err_free_info:
+ vfree(folios_info);
+ return err;
+}
+
+static int kho_test_prepare_fdt(struct kho_test_state *state, ssize_t fdt_size)
+{
+ const char compatible[] = KHO_TEST_COMPAT;
+ unsigned int magic = KHO_TEST_MAGIC;
+ void *fdt = folio_address(state->fdt);
+ int err;
+
+ err = fdt_create(fdt, fdt_size);
+ err |= fdt_finish_reservemap(fdt);
+ err |= fdt_begin_node(fdt, "");
+ err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
+ err |= fdt_property(fdt, "magic", &magic, sizeof(magic));
+
err |= fdt_begin_node(fdt, "data");
err |= fdt_property(fdt, "nr_folios", &state->nr_folios,
sizeof(state->nr_folios));
- err |= fdt_property(fdt, "folios_info", &folios_info_phys,
- sizeof(folios_info_phys));
+ err |= fdt_property(fdt, "folios_info", &state->folios_info_phys,
+ sizeof(state->folios_info_phys));
err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum));
err |= fdt_end_node(fdt);
- if (!err)
- state->folios_info = no_free_ptr(folios_info);
+ err |= fdt_end_node(fdt);
+ err |= fdt_finish(fdt);
return err;
}
-static int kho_test_prepare_fdt(struct kho_test_state *state)
+static int kho_test_preserve(struct kho_test_state *state)
{
- const char compatible[] = KHO_TEST_COMPAT;
- unsigned int magic = KHO_TEST_MAGIC;
ssize_t fdt_size;
- int err = 0;
- void *fdt;
+ int err;
fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE;
state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size));
if (!state->fdt)
return -ENOMEM;
- fdt = folio_address(state->fdt);
-
- err |= fdt_create(fdt, fdt_size);
- err |= fdt_finish_reservemap(fdt);
+ err = kho_preserve_folio(state->fdt);
+ if (err)
+ goto err_free_fdt;
- err |= fdt_begin_node(fdt, "");
- err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
- err |= fdt_property(fdt, "magic", &magic, sizeof(magic));
- err |= kho_test_save_data(state, fdt);
- err |= fdt_end_node(fdt);
+ err = kho_test_preserve_data(state);
+ if (err)
+ goto err_unpreserve_fdt;
- err |= fdt_finish(fdt);
+ err = kho_test_prepare_fdt(state, fdt_size);
+ if (err)
+ goto err_unpreserve_data;
+ err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
if (err)
- folio_put(state->fdt);
+ goto err_unpreserve_data;
+
+ return 0;
+err_unpreserve_data:
+ kho_test_unpreserve_data(state);
+err_unpreserve_fdt:
+ kho_unpreserve_folio(state->fdt);
+err_free_fdt:
+ folio_put(state->fdt);
return err;
}
@@ -199,18 +219,12 @@ static int kho_test_save(void)
if (err)
goto err_free_folios;
- err = kho_test_prepare_fdt(state);
+ err = kho_test_preserve(state);
if (err)
goto err_free_folios;
- err = register_kho_notifier(&kho_test_nb);
- if (err)
- goto err_free_fdt;
-
return 0;
-err_free_fdt:
- folio_put(state->fdt);
err_free_folios:
kvfree(folios);
return err;
@@ -292,7 +306,6 @@ static int kho_test_restore(phys_addr_t fdt_phys)
if (err)
return err;
- pr_info("KHO restore succeeded\n");
return 0;
}
@@ -305,8 +318,15 @@ static int __init kho_test_init(void)
return 0;
err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys);
- if (!err)
- return kho_test_restore(fdt_phys);
+ if (!err) {
+ err = kho_test_restore(fdt_phys);
+ if (err)
+ pr_err("KHO restore failed\n");
+ else
+ pr_info("KHO restore succeeded\n");
+
+ return err;
+ }
if (err != -ENOENT) {
pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err);
@@ -329,7 +349,7 @@ static void kho_test_cleanup(void)
static void __exit kho_test_exit(void)
{
- unregister_kho_notifier(&kho_test_nb);
+ kho_remove_subtree(folio_address(kho_test_state.fdt));
kho_test_cleanup();
}
module_exit(kho_test_exit);
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index f7460831cfdd..601dba4b7d96 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -4,6 +4,7 @@
# KUnit tests
CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN)
+obj-$(CONFIG_BASE64_KUNIT) += base64_kunit.o
obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
obj-$(CONFIG_BITS_TEST) += test_bits.o
obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o
diff --git a/lib/tests/base64_kunit.c b/lib/tests/base64_kunit.c
new file mode 100644
index 000000000000..f7252070c359
--- /dev/null
+++ b/lib/tests/base64_kunit.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * base64_kunit_test.c - KUnit tests for base64 encoding and decoding functions
+ *
+ * Copyright (c) 2025, Guan-Chun Wu <409411716@gms.tku.edu.tw>
+ */
+
+#include <kunit/test.h>
+#include <linux/base64.h>
+
+/* ---------- Benchmark helpers ---------- */
+static u64 bench_encode_ns(const u8 *data, int len, char *dst, int reps,
+ enum base64_variant variant)
+{
+ u64 t0, t1;
+
+ t0 = ktime_get_ns();
+ for (int i = 0; i < reps; i++)
+ base64_encode(data, len, dst, true, variant);
+ t1 = ktime_get_ns();
+
+ return div64_u64(t1 - t0, (u64)reps);
+}
+
+static u64 bench_decode_ns(const char *data, int len, u8 *dst, int reps,
+ enum base64_variant variant)
+{
+ u64 t0, t1;
+
+ t0 = ktime_get_ns();
+ for (int i = 0; i < reps; i++)
+ base64_decode(data, len, dst, true, variant);
+ t1 = ktime_get_ns();
+
+ return div64_u64(t1 - t0, (u64)reps);
+}
+
+static void run_perf_and_check(struct kunit *test, const char *label, int size,
+ enum base64_variant variant)
+{
+ const int reps = 1000;
+ size_t outlen = DIV_ROUND_UP(size, 3) * 4;
+ u8 *in = kmalloc(size, GFP_KERNEL);
+ char *enc = kmalloc(outlen, GFP_KERNEL);
+ u8 *decoded = kmalloc(size, GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, in);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, enc);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, decoded);
+
+ get_random_bytes(in, size);
+ int enc_len = base64_encode(in, size, enc, true, variant);
+ int dec_len = base64_decode(enc, enc_len, decoded, true, variant);
+
+ /* correctness sanity check */
+ KUNIT_EXPECT_EQ(test, dec_len, size);
+ KUNIT_EXPECT_MEMEQ(test, decoded, in, size);
+
+ /* benchmark encode */
+
+ u64 t1 = bench_encode_ns(in, size, enc, reps, variant);
+
+ kunit_info(test, "[%s] encode run : %lluns", label, t1);
+
+ u64 t2 = bench_decode_ns(enc, enc_len, decoded, reps, variant);
+
+ kunit_info(test, "[%s] decode run : %lluns", label, t2);
+
+ kfree(in);
+ kfree(enc);
+ kfree(decoded);
+}
+
+static void base64_performance_tests(struct kunit *test)
+{
+ /* run on STD variant only */
+ run_perf_and_check(test, "64B", 64, BASE64_STD);
+ run_perf_and_check(test, "1KB", 1024, BASE64_STD);
+}
+
+/* ---------- Helpers for encode ---------- */
+static void expect_encode_ok(struct kunit *test, const u8 *src, int srclen,
+ const char *expected, bool padding,
+ enum base64_variant variant)
+{
+ char buf[128];
+ int encoded_len = base64_encode(src, srclen, buf, padding, variant);
+
+ buf[encoded_len] = '\0';
+
+ KUNIT_EXPECT_EQ(test, encoded_len, strlen(expected));
+ KUNIT_EXPECT_STREQ(test, buf, expected);
+}
+
+/* ---------- Helpers for decode ---------- */
+static void expect_decode_ok(struct kunit *test, const char *src,
+ const u8 *expected, int expected_len, bool padding,
+ enum base64_variant variant)
+{
+ u8 buf[128];
+ int decoded_len = base64_decode(src, strlen(src), buf, padding, variant);
+
+ KUNIT_EXPECT_EQ(test, decoded_len, expected_len);
+ KUNIT_EXPECT_MEMEQ(test, buf, expected, expected_len);
+}
+
+static void expect_decode_err(struct kunit *test, const char *src,
+ int srclen, bool padding,
+ enum base64_variant variant)
+{
+ u8 buf[64];
+ int decoded_len = base64_decode(src, srclen, buf, padding, variant);
+
+ KUNIT_EXPECT_EQ(test, decoded_len, -1);
+}
+
+/* ---------- Encode Tests ---------- */
+static void base64_std_encode_tests(struct kunit *test)
+{
+ /* With padding */
+ expect_encode_ok(test, (const u8 *)"", 0, "", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"f", 1, "Zg==", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"fo", 2, "Zm8=", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foo", 3, "Zm9v", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foob", 4, "Zm9vYg==", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"fooba", 5, "Zm9vYmE=", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foobar", 6, "Zm9vYmFy", true, BASE64_STD);
+
+ /* Extra cases with padding */
+ expect_encode_ok(test, (const u8 *)"Hello, world!", 13, "SGVsbG8sIHdvcmxkIQ==",
+ true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26,
+ "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo=", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26,
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=", true, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"0123456789+/", 12, "MDEyMzQ1Njc4OSsv",
+ true, BASE64_STD);
+
+ /* Without padding */
+ expect_encode_ok(test, (const u8 *)"", 0, "", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"f", 1, "Zg", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"fo", 2, "Zm8", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foo", 3, "Zm9v", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foob", 4, "Zm9vYg", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"fooba", 5, "Zm9vYmE", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"foobar", 6, "Zm9vYmFy", false, BASE64_STD);
+
+ /* Extra cases without padding */
+ expect_encode_ok(test, (const u8 *)"Hello, world!", 13, "SGVsbG8sIHdvcmxkIQ",
+ false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26,
+ "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26,
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo", false, BASE64_STD);
+ expect_encode_ok(test, (const u8 *)"0123456789+/", 12, "MDEyMzQ1Njc4OSsv",
+ false, BASE64_STD);
+}
+
+/* ---------- Decode Tests ---------- */
+static void base64_std_decode_tests(struct kunit *test)
+{
+ /* -------- With padding --------*/
+ expect_decode_ok(test, "", (const u8 *)"", 0, true, BASE64_STD);
+ expect_decode_ok(test, "Zg==", (const u8 *)"f", 1, true, BASE64_STD);
+ expect_decode_ok(test, "Zm8=", (const u8 *)"fo", 2, true, BASE64_STD);
+ expect_decode_ok(test, "Zm9v", (const u8 *)"foo", 3, true, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYg==", (const u8 *)"foob", 4, true, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYmE=", (const u8 *)"fooba", 5, true, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYmFy", (const u8 *)"foobar", 6, true, BASE64_STD);
+ expect_decode_ok(test, "SGVsbG8sIHdvcmxkIQ==", (const u8 *)"Hello, world!", 13,
+ true, BASE64_STD);
+ expect_decode_ok(test, "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo=",
+ (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, true, BASE64_STD);
+ expect_decode_ok(test, "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=",
+ (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, true, BASE64_STD);
+
+ /* Error cases */
+ expect_decode_err(test, "Zg=!", 4, true, BASE64_STD);
+ expect_decode_err(test, "Zm$=", 4, true, BASE64_STD);
+ expect_decode_err(test, "Z===", 4, true, BASE64_STD);
+ expect_decode_err(test, "Zg", 2, true, BASE64_STD);
+ expect_decode_err(test, "Zm9v====", 8, true, BASE64_STD);
+ expect_decode_err(test, "Zm==A", 5, true, BASE64_STD);
+
+ {
+ char with_nul[4] = { 'Z', 'g', '\0', '=' };
+
+ expect_decode_err(test, with_nul, 4, true, BASE64_STD);
+ }
+
+ /* -------- Without padding --------*/
+ expect_decode_ok(test, "", (const u8 *)"", 0, false, BASE64_STD);
+ expect_decode_ok(test, "Zg", (const u8 *)"f", 1, false, BASE64_STD);
+ expect_decode_ok(test, "Zm8", (const u8 *)"fo", 2, false, BASE64_STD);
+ expect_decode_ok(test, "Zm9v", (const u8 *)"foo", 3, false, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYg", (const u8 *)"foob", 4, false, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYmE", (const u8 *)"fooba", 5, false, BASE64_STD);
+ expect_decode_ok(test, "Zm9vYmFy", (const u8 *)"foobar", 6, false, BASE64_STD);
+ expect_decode_ok(test, "TWFu", (const u8 *)"Man", 3, false, BASE64_STD);
+ expect_decode_ok(test, "SGVsbG8sIHdvcmxkIQ", (const u8 *)"Hello, world!", 13,
+ false, BASE64_STD);
+ expect_decode_ok(test, "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo",
+ (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, false, BASE64_STD);
+ expect_decode_ok(test, "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo",
+ (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, false, BASE64_STD);
+ expect_decode_ok(test, "MDEyMzQ1Njc4OSsv", (const u8 *)"0123456789+/", 12,
+ false, BASE64_STD);
+
+ /* Error cases */
+ expect_decode_err(test, "Zg=!", 4, false, BASE64_STD);
+ expect_decode_err(test, "Zm$=", 4, false, BASE64_STD);
+ expect_decode_err(test, "Z===", 4, false, BASE64_STD);
+ expect_decode_err(test, "Zg=", 3, false, BASE64_STD);
+ expect_decode_err(test, "Zm9v====", 8, false, BASE64_STD);
+ expect_decode_err(test, "Zm==v", 4, false, BASE64_STD);
+
+ {
+ char with_nul[4] = { 'Z', 'g', '\0', '=' };
+
+ expect_decode_err(test, with_nul, 4, false, BASE64_STD);
+ }
+}
+
+/* ---------- Variant tests (URLSAFE / IMAP) ---------- */
+static void base64_variant_tests(struct kunit *test)
+{
+ const u8 sample1[] = { 0x00, 0xfb, 0xff, 0x7f, 0x80 };
+ char std_buf[128], url_buf[128], imap_buf[128];
+ u8 back[128];
+ int n_std, n_url, n_imap, m;
+ int i;
+
+ n_std = base64_encode(sample1, sizeof(sample1), std_buf, false, BASE64_STD);
+ n_url = base64_encode(sample1, sizeof(sample1), url_buf, false, BASE64_URLSAFE);
+ std_buf[n_std] = '\0';
+ url_buf[n_url] = '\0';
+
+ for (i = 0; i < n_std; i++) {
+ if (std_buf[i] == '+')
+ std_buf[i] = '-';
+ else if (std_buf[i] == '/')
+ std_buf[i] = '_';
+ }
+ KUNIT_EXPECT_STREQ(test, std_buf, url_buf);
+
+ m = base64_decode(url_buf, n_url, back, false, BASE64_URLSAFE);
+ KUNIT_EXPECT_EQ(test, m, (int)sizeof(sample1));
+ KUNIT_EXPECT_MEMEQ(test, back, sample1, sizeof(sample1));
+
+ n_std = base64_encode(sample1, sizeof(sample1), std_buf, false, BASE64_STD);
+ n_imap = base64_encode(sample1, sizeof(sample1), imap_buf, false, BASE64_IMAP);
+ std_buf[n_std] = '\0';
+ imap_buf[n_imap] = '\0';
+
+ for (i = 0; i < n_std; i++)
+ if (std_buf[i] == '/')
+ std_buf[i] = ',';
+ KUNIT_EXPECT_STREQ(test, std_buf, imap_buf);
+
+ m = base64_decode(imap_buf, n_imap, back, false, BASE64_IMAP);
+ KUNIT_EXPECT_EQ(test, m, (int)sizeof(sample1));
+ KUNIT_EXPECT_MEMEQ(test, back, sample1, sizeof(sample1));
+
+ {
+ const char *bad = "Zg==";
+ u8 tmp[8];
+
+ m = base64_decode(bad, strlen(bad), tmp, false, BASE64_URLSAFE);
+ KUNIT_EXPECT_EQ(test, m, -1);
+
+ m = base64_decode(bad, strlen(bad), tmp, false, BASE64_IMAP);
+ KUNIT_EXPECT_EQ(test, m, -1);
+ }
+}
+
+/* ---------- Test registration ---------- */
+static struct kunit_case base64_test_cases[] = {
+ KUNIT_CASE(base64_performance_tests),
+ KUNIT_CASE(base64_std_encode_tests),
+ KUNIT_CASE(base64_std_decode_tests),
+ KUNIT_CASE(base64_variant_tests),
+ {}
+};
+
+static struct kunit_suite base64_test_suite = {
+ .name = "base64",
+ .test_cases = base64_test_cases,
+};
+
+kunit_test_suite(base64_test_suite);
+
+MODULE_AUTHOR("Guan-Chun Wu <409411716@gms.tku.edu.tw>");
+MODULE_DESCRIPTION("KUnit tests for Base64 encoding/decoding, including performance checks");
+MODULE_LICENSE("GPL");
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 7b17b83c8042..b00a3a957de6 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -12,7 +12,7 @@
/* out-of-line parts */
-#if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST)
+#if !defined(INLINE_COPY_FROM_USER)
unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
{
return _inline_copy_from_user(to, from, n);
@@ -20,7 +20,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
EXPORT_SYMBOL(_copy_from_user);
#endif
-#if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST)
+#if !defined(INLINE_COPY_TO_USER)
unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
{
return _inline_copy_to_user(to, from, n);
diff --git a/lib/xxhash.c b/lib/xxhash.c
index cf629766f376..4125b3e3cf7f 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -73,21 +73,6 @@ static const uint64_t PRIME64_3 = 1609587929392839161ULL;
static const uint64_t PRIME64_4 = 9650029242287828579ULL;
static const uint64_t PRIME64_5 = 2870177450012600261ULL;
-/*-**************************
- * Utils
- ***************************/
-void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-EXPORT_SYMBOL(xxh32_copy_state);
-
-void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-EXPORT_SYMBOL(xxh64_copy_state);
-
/*-***************************
* Simple Hash Functions
****************************/
@@ -239,20 +224,6 @@ EXPORT_SYMBOL(xxh64);
/*-**************************************************
* Advanced Hash Functions
***************************************************/
-void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
-{
- /* use a local state for memcpy() to avoid strict-aliasing warnings */
- struct xxh32_state state;
-
- memset(&state, 0, sizeof(state));
- state.v1 = seed + PRIME32_1 + PRIME32_2;
- state.v2 = seed + PRIME32_2;
- state.v3 = seed + 0;
- state.v4 = seed - PRIME32_1;
- memcpy(statePtr, &state, sizeof(state));
-}
-EXPORT_SYMBOL(xxh32_reset);
-
void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
{
/* use a local state for memcpy() to avoid strict-aliasing warnings */
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
index 8237db17eee3..610d58d947ab 100644
--- a/lib/xz/xz_dec_bcj.c
+++ b/lib/xz/xz_dec_bcj.c
@@ -20,7 +20,6 @@ struct xz_dec_bcj {
enum {
BCJ_X86 = 4, /* x86 or x86-64 */
BCJ_POWERPC = 5, /* Big endian only */
- BCJ_IA64 = 6, /* Big or little endian */
BCJ_ARM = 7, /* Little endian only */
BCJ_ARMTHUMB = 8, /* Little endian only */
BCJ_SPARC = 9, /* Big or little endian */
@@ -180,92 +179,6 @@ static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
}
#endif
-#ifdef XZ_DEC_IA64
-static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
-{
- static const uint8_t branch_table[32] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 4, 4, 6, 6, 0, 0, 7, 7,
- 4, 4, 0, 0, 4, 4, 0, 0
- };
-
- /*
- * The local variables take a little bit stack space, but it's less
- * than what LZMA2 decoder takes, so it doesn't make sense to reduce
- * stack usage here without doing that for the LZMA2 decoder too.
- */
-
- /* Loop counters */
- size_t i;
- size_t j;
-
- /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
- uint32_t slot;
-
- /* Bitwise offset of the instruction indicated by slot */
- uint32_t bit_pos;
-
- /* bit_pos split into byte and bit parts */
- uint32_t byte_pos;
- uint32_t bit_res;
-
- /* Address part of an instruction */
- uint32_t addr;
-
- /* Mask used to detect which instructions to convert */
- uint32_t mask;
-
- /* 41-bit instruction stored somewhere in the lowest 48 bits */
- uint64_t instr;
-
- /* Instruction normalized with bit_res for easier manipulation */
- uint64_t norm;
-
- size &= ~(size_t)15;
-
- for (i = 0; i < size; i += 16) {
- mask = branch_table[buf[i] & 0x1F];
- for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
- if (((mask >> slot) & 1) == 0)
- continue;
-
- byte_pos = bit_pos >> 3;
- bit_res = bit_pos & 7;
- instr = 0;
- for (j = 0; j < 6; ++j)
- instr |= (uint64_t)(buf[i + j + byte_pos])
- << (8 * j);
-
- norm = instr >> bit_res;
-
- if (((norm >> 37) & 0x0F) == 0x05
- && ((norm >> 9) & 0x07) == 0) {
- addr = (norm >> 13) & 0x0FFFFF;
- addr |= ((uint32_t)(norm >> 36) & 1) << 20;
- addr <<= 4;
- addr -= s->pos + (uint32_t)i;
- addr >>= 4;
-
- norm &= ~((uint64_t)0x8FFFFF << 13);
- norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
- norm |= (uint64_t)(addr & 0x100000)
- << (36 - 20);
-
- instr &= (1 << bit_res) - 1;
- instr |= norm << bit_res;
-
- for (j = 0; j < 6; j++)
- buf[i + j + byte_pos]
- = (uint8_t)(instr >> (8 * j));
- }
- }
- }
-
- return i;
-}
-#endif
-
#ifdef XZ_DEC_ARM
static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
@@ -509,11 +422,6 @@ static void bcj_apply(struct xz_dec_bcj *s,
filtered = bcj_powerpc(s, buf, size);
break;
#endif
-#ifdef XZ_DEC_IA64
- case BCJ_IA64:
- filtered = bcj_ia64(s, buf, size);
- break;
-#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
filtered = bcj_arm(s, buf, size);
@@ -699,9 +607,6 @@ enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
#ifdef XZ_DEC_POWERPC
case BCJ_POWERPC:
#endif
-#ifdef XZ_DEC_IA64
- case BCJ_IA64:
-#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
#endif
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index 8409784b1639..6775078f3cce 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -24,9 +24,6 @@
# ifdef CONFIG_XZ_DEC_POWERPC
# define XZ_DEC_POWERPC
# endif
-# ifdef CONFIG_XZ_DEC_IA64
-# define XZ_DEC_IA64
-# endif
# ifdef CONFIG_XZ_DEC_ARM
# define XZ_DEC_ARM
# endif
@@ -103,7 +100,6 @@
*/
#ifndef XZ_DEC_BCJ
# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
- || defined(XZ_DEC_IA64) \
|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
|| defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) \
|| defined(XZ_DEC_RISCV)
diff --git a/mm/Makefile b/mm/Makefile
index 00ceb2418b64..2d0570a16e5b 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
+obj-$(CONFIG_LIVEUPDATE) += memfd_luo.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
ifdef CONFIG_SWAP
diff --git a/mm/internal.h b/mm/internal.h
index 89790def1bae..e430da900430 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1582,6 +1582,12 @@ void __meminit __init_page_from_nid(unsigned long pfn, int nid);
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
+int shmem_add_to_page_cache(struct folio *folio,
+ struct address_space *mapping,
+ pgoff_t index, void *expected, gfp_t gfp);
+int shmem_inode_acct_blocks(struct inode *inode, long pages);
+bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped);
+
#ifdef CONFIG_SHRINKER_DEBUG
static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
struct shrinker *shrinker, const char *fmt, va_list ap)
diff --git a/mm/memblock.c b/mm/memblock.c
index f0f2dc66e9a2..905d06b16348 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2445,60 +2445,59 @@ int reserve_mem_release_by_name(const char *name)
#define MEMBLOCK_KHO_FDT "memblock"
#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
-static struct page *kho_fdt;
-static int reserve_mem_kho_finalize(struct kho_serialization *ser)
+static int __init reserved_mem_preserve(void)
{
- int err = 0, i;
+ unsigned int nr_preserved = 0;
+ int err;
- for (i = 0; i < reserved_mem_count; i++) {
+ for (unsigned int i = 0; i < reserved_mem_count; i++, nr_preserved++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
struct page *page = phys_to_page(map->start);
unsigned int nr_pages = map->size >> PAGE_SHIFT;
- err |= kho_preserve_pages(page, nr_pages);
+ err = kho_preserve_pages(page, nr_pages);
+ if (err)
+ goto err_unpreserve;
}
- err |= kho_preserve_folio(page_folio(kho_fdt));
- err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
+ return 0;
- return notifier_from_errno(err);
-}
+err_unpreserve:
+ for (unsigned int i = 0; i < nr_preserved; i++) {
+ struct reserve_mem_table *map = &reserved_mem_table[i];
+ struct page *page = phys_to_page(map->start);
+ unsigned int nr_pages = map->size >> PAGE_SHIFT;
-static int reserve_mem_kho_notifier(struct notifier_block *self,
- unsigned long cmd, void *v)
-{
- switch (cmd) {
- case KEXEC_KHO_FINALIZE:
- return reserve_mem_kho_finalize((struct kho_serialization *)v);
- case KEXEC_KHO_ABORT:
- return NOTIFY_DONE;
- default:
- return NOTIFY_BAD;
+ kho_unpreserve_pages(page, nr_pages);
}
-}
-static struct notifier_block reserve_mem_kho_nb = {
- .notifier_call = reserve_mem_kho_notifier,
-};
+ return err;
+}
static int __init prepare_kho_fdt(void)
{
- int err = 0, i;
+ struct page *fdt_page;
void *fdt;
+ int err;
- kho_fdt = alloc_page(GFP_KERNEL);
- if (!kho_fdt)
- return -ENOMEM;
+ fdt_page = alloc_page(GFP_KERNEL);
+ if (!fdt_page) {
+ err = -ENOMEM;
+ goto err_report;
+ }
- fdt = page_to_virt(kho_fdt);
+ fdt = page_to_virt(fdt_page);
+ err = kho_preserve_pages(fdt_page, 1);
+ if (err)
+ goto err_free_fdt;
err |= fdt_create(fdt, PAGE_SIZE);
err |= fdt_finish_reservemap(fdt);
-
err |= fdt_begin_node(fdt, "");
err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
- for (i = 0; i < reserved_mem_count; i++) {
+
+ for (unsigned int i = 0; !err && i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
err |= fdt_begin_node(fdt, map->name);
@@ -2508,14 +2507,29 @@ static int __init prepare_kho_fdt(void)
err |= fdt_end_node(fdt);
}
err |= fdt_end_node(fdt);
-
err |= fdt_finish(fdt);
- if (err) {
- pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
- put_page(kho_fdt);
- kho_fdt = NULL;
- }
+ if (err)
+ goto err_unpreserve_fdt;
+
+ err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
+ if (err)
+ goto err_unpreserve_fdt;
+
+ err = reserved_mem_preserve();
+ if (err)
+ goto err_remove_subtree;
+
+ return 0;
+
+err_remove_subtree:
+ kho_remove_subtree(fdt);
+err_unpreserve_fdt:
+ kho_unpreserve_pages(fdt_page, 1);
+err_free_fdt:
+ put_page(fdt_page);
+err_report:
+ pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
return err;
}
@@ -2530,13 +2544,6 @@ static int __init reserve_mem_init(void)
err = prepare_kho_fdt();
if (err)
return err;
-
- err = register_kho_notifier(&reserve_mem_kho_nb);
- if (err) {
- put_page(kho_fdt);
- kho_fdt = NULL;
- }
-
return err;
}
late_initcall(reserve_mem_init);
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
new file mode 100644
index 000000000000..4f6ba63b4310
--- /dev/null
+++ b/mm/memfd_luo.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Copyright (C) 2025 Amazon.com Inc. or its affiliates.
+ * Pratyush Yadav <ptyadav@amazon.de>
+ */
+
+/**
+ * DOC: Memfd Preservation via LUO
+ *
+ * Overview
+ * ========
+ *
+ * Memory file descriptors (memfd) can be preserved over a kexec using the Live
+ * Update Orchestrator (LUO) file preservation. This allows userspace to
+ * transfer its memory contents to the next kernel after a kexec.
+ *
+ * The preservation is not intended to be transparent. Only select properties of
+ * the file are preserved. All others are reset to default. The preserved
+ * properties are described below.
+ *
+ * .. note::
+ * The LUO API is not stabilized yet, so the preserved properties of a memfd
+ * are also not stable and are subject to backwards incompatible changes.
+ *
+ * .. note::
+ * Currently a memfd backed by Hugetlb is not supported. Memfds created
+ * with ``MFD_HUGETLB`` will be rejected.
+ *
+ * Preserved Properties
+ * ====================
+ *
+ * The following properties of the memfd are preserved across kexec:
+ *
+ * File Contents
+ * All data stored in the file is preserved.
+ *
+ * File Size
+ * The size of the file is preserved. Holes in the file are filled by
+ * allocating pages for them during preservation.
+ *
+ * File Position
+ * The current file position is preserved, allowing applications to continue
+ * reading/writing from their last position.
+ *
+ * File Status Flags
+ * memfds are always opened with ``O_RDWR`` and ``O_LARGEFILE``. This property
+ * is maintained.
+ *
+ * Non-Preserved Properties
+ * ========================
+ *
+ * All properties which are not preserved must be assumed to be reset to
+ * default. This section describes some of those properties which may be more of
+ * note.
+ *
+ * ``FD_CLOEXEC`` flag
+ * A memfd can be created with the ``MFD_CLOEXEC`` flag that sets the
+ * ``FD_CLOEXEC`` on the file. This flag is not preserved and must be set
+ * again after restore via ``fcntl()``.
+ *
+ * Seals
+ * File seals are not preserved. The file is unsealed on restore and if
+ * needed, must be sealed again via ``fcntl()``.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bits.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/memfd.h>
+#include <linux/liveupdate.h>
+#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
+#include "internal.h"
+
+static int memfd_luo_preserve_folios(struct file *file,
+ struct kho_vmalloc *kho_vmalloc,
+ struct memfd_luo_folio_ser **out_folios_ser,
+ u64 *nr_foliosp)
+{
+ struct inode *inode = file_inode(file);
+ struct memfd_luo_folio_ser *folios_ser;
+ unsigned int max_folios;
+ long i, size, nr_pinned;
+ struct folio **folios;
+ int err = -EINVAL;
+ pgoff_t offset;
+ u64 nr_folios;
+
+ size = i_size_read(inode);
+ /*
+ * If the file has zero size, then the folios and nr_folios properties
+ * are not set.
+ */
+ if (!size) {
+ *nr_foliosp = 0;
+ *out_folios_ser = NULL;
+ memset(kho_vmalloc, 0, sizeof(*kho_vmalloc));
+ return 0;
+ }
+
+ /*
+ * Guess the number of folios based on inode size. Real number might end
+ * up being smaller if there are higher order folios.
+ */
+ max_folios = PAGE_ALIGN(size) / PAGE_SIZE;
+ folios = kvmalloc_array(max_folios, sizeof(*folios), GFP_KERNEL);
+ if (!folios)
+ return -ENOMEM;
+
+ /*
+ * Pin the folios so they don't move around behind our back. This also
+ * ensures none of the folios are in CMA -- which ensures they don't
+ * fall in KHO scratch memory. It also moves swapped out folios back to
+ * memory.
+ *
+ * A side effect of doing this is that it allocates a folio for all
+ * indices in the file. This might waste memory on sparse memfds. If
+ * that is really a problem in the future, we can have a
+ * memfd_pin_folios() variant that does not allocate a page on empty
+ * slots.
+ */
+ nr_pinned = memfd_pin_folios(file, 0, size - 1, folios, max_folios,
+ &offset);
+ if (nr_pinned < 0) {
+ err = nr_pinned;
+ pr_err("failed to pin folios: %d\n", err);
+ goto err_free_folios;
+ }
+ nr_folios = nr_pinned;
+
+ folios_ser = vcalloc(nr_folios, sizeof(*folios_ser));
+ if (!folios_ser) {
+ err = -ENOMEM;
+ goto err_unpin;
+ }
+
+ for (i = 0; i < nr_folios; i++) {
+ struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio = folios[i];
+ unsigned int flags = 0;
+
+ err = kho_preserve_folio(folio);
+ if (err)
+ goto err_unpreserve;
+
+ if (folio_test_dirty(folio))
+ flags |= MEMFD_LUO_FOLIO_DIRTY;
+ if (folio_test_uptodate(folio))
+ flags |= MEMFD_LUO_FOLIO_UPTODATE;
+
+ pfolio->pfn = folio_pfn(folio);
+ pfolio->flags = flags;
+ pfolio->index = folio->index;
+ }
+
+ err = kho_preserve_vmalloc(folios_ser, kho_vmalloc);
+ if (err)
+ goto err_unpreserve;
+
+ kvfree(folios);
+ *nr_foliosp = nr_folios;
+ *out_folios_ser = folios_ser;
+
+ /*
+ * Note: folios_ser is purposely not freed here. It is preserved
+ * memory (via KHO). In the 'unpreserve' path, we use the vmap pointer
+ * that is passed via private_data.
+ */
+ return 0;
+
+err_unpreserve:
+ for (i = i - 1; i >= 0; i--)
+ kho_unpreserve_folio(folios[i]);
+ vfree(folios_ser);
+err_unpin:
+ unpin_folios(folios, nr_folios);
+err_free_folios:
+ kvfree(folios);
+
+ return err;
+}
+
+static void memfd_luo_unpreserve_folios(struct kho_vmalloc *kho_vmalloc,
+ struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ long i;
+
+ if (!nr_folios)
+ return;
+
+ kho_unpreserve_vmalloc(kho_vmalloc);
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio;
+
+ if (!pfolio->pfn)
+ continue;
+
+ folio = pfn_folio(pfolio->pfn);
+
+ kho_unpreserve_folio(folio);
+ unpin_folio(folio);
+ }
+
+ vfree(folios_ser);
+}
+
+static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
+{
+ struct inode *inode = file_inode(args->file);
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+ u64 nr_folios;
+ int err = 0;
+
+ inode_lock(inode);
+ shmem_freeze(inode, true);
+
+ /* Allocate the main serialization structure in preserved memory */
+ ser = kho_alloc_preserve(sizeof(*ser));
+ if (IS_ERR(ser)) {
+ err = PTR_ERR(ser);
+ goto err_unlock;
+ }
+
+ ser->pos = args->file->f_pos;
+ ser->size = i_size_read(inode);
+
+ err = memfd_luo_preserve_folios(args->file, &ser->folios,
+ &folios_ser, &nr_folios);
+ if (err)
+ goto err_free_ser;
+
+ ser->nr_folios = nr_folios;
+ inode_unlock(inode);
+
+ args->private_data = folios_ser;
+ args->serialized_data = virt_to_phys(ser);
+
+ return 0;
+
+err_free_ser:
+ kho_unpreserve_free(ser);
+err_unlock:
+ shmem_freeze(inode, false);
+ inode_unlock(inode);
+ return err;
+}
+
+static int memfd_luo_freeze(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_ser *ser;
+
+ if (WARN_ON_ONCE(!args->serialized_data))
+ return -EINVAL;
+
+ ser = phys_to_virt(args->serialized_data);
+
+ /*
+ * The pos might have changed since prepare. Everything else stays the
+ * same.
+ */
+ ser->pos = args->file->f_pos;
+
+ return 0;
+}
+
+static void memfd_luo_unpreserve(struct liveupdate_file_op_args *args)
+{
+ struct inode *inode = file_inode(args->file);
+ struct memfd_luo_ser *ser;
+
+ if (WARN_ON_ONCE(!args->serialized_data))
+ return;
+
+ inode_lock(inode);
+ shmem_freeze(inode, false);
+
+ ser = phys_to_virt(args->serialized_data);
+
+ memfd_luo_unpreserve_folios(&ser->folios, args->private_data,
+ ser->nr_folios);
+
+ kho_unpreserve_free(ser);
+ inode_unlock(inode);
+}
+
+static void memfd_luo_discard_folios(const struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ u64 i;
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ struct folio *folio;
+ phys_addr_t phys;
+
+ if (!pfolio->pfn)
+ continue;
+
+ phys = PFN_PHYS(pfolio->pfn);
+ folio = kho_restore_folio(phys);
+ if (!folio) {
+ pr_warn_ratelimited("Unable to restore folio at physical address: %llx\n",
+ phys);
+ continue;
+ }
+
+ folio_put(folio);
+ }
+}
+
+static void memfd_luo_finish(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+
+ if (args->retrieved)
+ return;
+
+ ser = phys_to_virt(args->serialized_data);
+ if (!ser)
+ return;
+
+ if (ser->nr_folios) {
+ folios_ser = kho_restore_vmalloc(&ser->folios);
+ if (!folios_ser)
+ goto out;
+
+ memfd_luo_discard_folios(folios_ser, ser->nr_folios);
+ vfree(folios_ser);
+ }
+
+out:
+ kho_restore_free(ser);
+}
+
+static int memfd_luo_retrieve_folios(struct file *file,
+ struct memfd_luo_folio_ser *folios_ser,
+ u64 nr_folios)
+{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio;
+ int err = -EIO;
+ long i;
+
+ for (i = 0; i < nr_folios; i++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[i];
+ phys_addr_t phys;
+ u64 index;
+ int flags;
+
+ if (!pfolio->pfn)
+ continue;
+
+ phys = PFN_PHYS(pfolio->pfn);
+ folio = kho_restore_folio(phys);
+ if (!folio) {
+ pr_err("Unable to restore folio at physical address: %llx\n",
+ phys);
+ goto put_folios;
+ }
+ index = pfolio->index;
+ flags = pfolio->flags;
+
+ /* Set up the folio for insertion. */
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+
+ err = mem_cgroup_charge(folio, NULL, mapping_gfp_mask(mapping));
+ if (err) {
+ pr_err("shmem: failed to charge folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ err = shmem_add_to_page_cache(folio, mapping, index, NULL,
+ mapping_gfp_mask(mapping));
+ if (err) {
+ pr_err("shmem: failed to add to page cache folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ if (flags & MEMFD_LUO_FOLIO_UPTODATE)
+ folio_mark_uptodate(folio);
+ if (flags & MEMFD_LUO_FOLIO_DIRTY)
+ folio_mark_dirty(folio);
+
+ err = shmem_inode_acct_blocks(inode, 1);
+ if (err) {
+ pr_err("shmem: failed to account folio index %ld: %d\n",
+ i, err);
+ goto unlock_folio;
+ }
+
+ shmem_recalc_inode(inode, 1, 0);
+ folio_add_lru(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ return 0;
+
+unlock_folio:
+ folio_unlock(folio);
+ folio_put(folio);
+put_folios:
+ /*
+ * Note: don't free the folios already added to the file. They will be
+ * freed when the file is freed. Free the ones not added yet here.
+ */
+ for (long j = i + 1; j < nr_folios; j++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[j];
+
+ folio = kho_restore_folio(pfolio->pfn);
+ if (folio)
+ folio_put(folio);
+ }
+
+ return err;
+}
+
+static int memfd_luo_retrieve(struct liveupdate_file_op_args *args)
+{
+ struct memfd_luo_folio_ser *folios_ser;
+ struct memfd_luo_ser *ser;
+ struct file *file;
+ int err;
+
+ ser = phys_to_virt(args->serialized_data);
+ if (!ser)
+ return -EINVAL;
+
+ file = shmem_file_setup("", 0, VM_NORESERVE);
+
+ if (IS_ERR(file)) {
+ pr_err("failed to setup file: %pe\n", file);
+ return PTR_ERR(file);
+ }
+
+ vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE);
+ file->f_inode->i_size = ser->size;
+
+ if (ser->nr_folios) {
+ folios_ser = kho_restore_vmalloc(&ser->folios);
+ if (!folios_ser) {
+ err = -EINVAL;
+ goto put_file;
+ }
+
+ err = memfd_luo_retrieve_folios(file, folios_ser, ser->nr_folios);
+ vfree(folios_ser);
+ if (err)
+ goto put_file;
+ }
+
+ args->file = file;
+ kho_restore_free(ser);
+
+ return 0;
+
+put_file:
+ fput(file);
+
+ return err;
+}
+
+static bool memfd_luo_can_preserve(struct liveupdate_file_handler *handler,
+ struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ return shmem_file(file) && !inode->i_nlink;
+}
+
+static const struct liveupdate_file_ops memfd_luo_file_ops = {
+ .freeze = memfd_luo_freeze,
+ .finish = memfd_luo_finish,
+ .retrieve = memfd_luo_retrieve,
+ .preserve = memfd_luo_preserve,
+ .unpreserve = memfd_luo_unpreserve,
+ .can_preserve = memfd_luo_can_preserve,
+ .owner = THIS_MODULE,
+};
+
+static struct liveupdate_file_handler memfd_luo_handler = {
+ .ops = &memfd_luo_file_ops,
+ .compatible = MEMFD_LUO_FH_COMPATIBLE,
+};
+
+static int __init memfd_luo_init(void)
+{
+ int err = liveupdate_register_file_handler(&memfd_luo_handler);
+
+ if (err && err != -EOPNOTSUPP) {
+ pr_err("Could not register luo filesystem handler: %pe\n",
+ ERR_PTR(err));
+
+ return err;
+ }
+
+ return 0;
+}
+late_initcall(memfd_luo_init);
diff --git a/mm/shmem.c b/mm/shmem.c
index d578d8e765d7..3f194c9842a8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -174,20 +174,20 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
*/
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
- return (flags & VM_NORESERVE) ?
+ return (flags & SHMEM_F_NORESERVE) ?
0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
}
static inline void shmem_unacct_size(unsigned long flags, loff_t size)
{
- if (!(flags & VM_NORESERVE))
+ if (!(flags & SHMEM_F_NORESERVE))
vm_unacct_memory(VM_ACCT(size));
}
static inline int shmem_reacct_size(unsigned long flags,
loff_t oldsize, loff_t newsize)
{
- if (!(flags & VM_NORESERVE)) {
+ if (!(flags & SHMEM_F_NORESERVE)) {
if (VM_ACCT(newsize) > VM_ACCT(oldsize))
return security_vm_enough_memory_mm(current->mm,
VM_ACCT(newsize) - VM_ACCT(oldsize));
@@ -205,7 +205,7 @@ static inline int shmem_reacct_size(unsigned long flags,
*/
static inline int shmem_acct_blocks(unsigned long flags, long pages)
{
- if (!(flags & VM_NORESERVE))
+ if (!(flags & SHMEM_F_NORESERVE))
return 0;
return security_vm_enough_memory_mm(current->mm,
@@ -214,11 +214,11 @@ static inline int shmem_acct_blocks(unsigned long flags, long pages)
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
{
- if (flags & VM_NORESERVE)
+ if (flags & SHMEM_F_NORESERVE)
vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}
-static int shmem_inode_acct_blocks(struct inode *inode, long pages)
+int shmem_inode_acct_blocks(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -434,7 +434,7 @@ static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
*
* Return: true if swapped was incremented from 0, for shmem_writeout().
*/
-static bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
+bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
struct shmem_inode_info *info = SHMEM_I(inode);
bool first_swapped = false;
@@ -878,9 +878,9 @@ static void shmem_update_stats(struct folio *folio, int nr_pages)
/*
* Somewhat like filemap_add_folio, but error if expected item has gone.
*/
-static int shmem_add_to_page_cache(struct folio *folio,
- struct address_space *mapping,
- pgoff_t index, void *expected, gfp_t gfp)
+int shmem_add_to_page_cache(struct folio *folio,
+ struct address_space *mapping,
+ pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
unsigned long nr = folio_nr_pages(folio);
@@ -1314,6 +1314,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
return -EPERM;
if (newsize != oldsize) {
+ if (info->flags & SHMEM_F_MAPPING_FROZEN)
+ return -EPERM;
error = shmem_reacct_size(SHMEM_I(inode)->flags,
oldsize, newsize);
if (error)
@@ -1568,7 +1570,7 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
int nr_pages;
bool split = false;
- if ((info->flags & VM_LOCKED) || sbinfo->noswap)
+ if ((info->flags & SHMEM_F_LOCKED) || sbinfo->noswap)
goto redirty;
if (!total_swap_pages)
@@ -2926,15 +2928,15 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
* ipc_lock_object() when called from shmctl_do_lock(),
* no serialization needed when called from shm_destroy().
*/
- if (lock && !(info->flags & VM_LOCKED)) {
+ if (lock && !(info->flags & SHMEM_F_LOCKED)) {
if (!user_shm_lock(inode->i_size, ucounts))
goto out_nomem;
- info->flags |= VM_LOCKED;
+ info->flags |= SHMEM_F_LOCKED;
mapping_set_unevictable(file->f_mapping);
}
- if (!lock && (info->flags & VM_LOCKED) && ucounts) {
+ if (!lock && (info->flags & SHMEM_F_LOCKED) && ucounts) {
user_shm_unlock(inode->i_size, ucounts);
- info->flags &= ~VM_LOCKED;
+ info->flags &= ~SHMEM_F_LOCKED;
mapping_clear_unevictable(file->f_mapping);
}
retval = 0;
@@ -3079,7 +3081,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
- info->flags = flags & VM_NORESERVE;
+ info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
info->i_crtime = inode_get_mtime(inode);
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
@@ -3306,6 +3308,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
return -EPERM;
}
+ if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) &&
+ pos + len > inode->i_size))
+ return -EPERM;
+
ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
if (ret)
return ret;
@@ -3679,6 +3685,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
+ if (info->flags & SHMEM_F_MAPPING_FROZEN) {
+ error = -EPERM;
+ goto out;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
loff_t unmap_start = round_up(offset, PAGE_SIZE);
@@ -5799,8 +5810,10 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
/* common code */
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
- loff_t size, unsigned long flags, unsigned int i_flags)
+ loff_t size, unsigned long vm_flags,
+ unsigned int i_flags)
{
+ unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
struct inode *inode;
struct file *res;
@@ -5817,7 +5830,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
return ERR_PTR(-ENOMEM);
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
- S_IFREG | S_IRWXUGO, 0, flags);
+ S_IFREG | S_IRWXUGO, 0, vm_flags);
if (IS_ERR(inode)) {
shmem_unacct_size(flags, size);
return ERR_CAST(inode);
diff --git a/rust/helpers/rbtree.c b/rust/helpers/rbtree.c
index 6d404b84a9b5..2a0eabbb4160 100644
--- a/rust/helpers/rbtree.c
+++ b/rust/helpers/rbtree.c
@@ -7,3 +7,13 @@ void rust_helper_rb_link_node(struct rb_node *node, struct rb_node *parent,
{
rb_link_node(node, parent, rb_link);
}
+
+struct rb_node *rust_helper_rb_first(const struct rb_root *root)
+{
+ return rb_first(root);
+}
+
+struct rb_node *rust_helper_rb_last(const struct rb_root *root)
+{
+ return rb_last(root);
+}
diff --git a/rust/helpers/uaccess.c b/rust/helpers/uaccess.c
index f49076f813cd..4629b2d15529 100644
--- a/rust/helpers/uaccess.c
+++ b/rust/helpers/uaccess.c
@@ -13,3 +13,15 @@ unsigned long rust_helper_copy_to_user(void __user *to, const void *from,
{
return copy_to_user(to, from, n);
}
+
+#ifdef INLINE_COPY_FROM_USER
+unsigned long rust_helper__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return _inline_copy_from_user(to, from, n);
+}
+
+unsigned long rust_helper__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return _inline_copy_to_user(to, from, n);
+}
+#endif
diff --git a/samples/Kconfig b/samples/Kconfig
index 6e072a5f1ed8..5bc7c9e5a59e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -23,11 +23,11 @@ config SAMPLE_TRACE_CUSTOM_EVENTS
This builds the custom trace event example module.
config SAMPLE_TRACE_PRINTK
- tristate "Build trace_printk module - tests various trace_printk formats"
+ tristate "Build trace_printk module - tests various trace_printk formats"
depends on EVENT_TRACING && m
help
- This builds a module that calls trace_printk() and can be used to
- test various trace_printk() calls from a module.
+ This builds a module that calls trace_printk() and can be used to
+ test various trace_printk() calls from a module.
config SAMPLE_FTRACE_DIRECT
tristate "Build register_ftrace_direct() example"
@@ -54,11 +54,11 @@ config SAMPLE_FTRACE_OPS
measures the time taken to invoke one function a number of times.
config SAMPLE_TRACE_ARRAY
- tristate "Build sample module for kernel access to Ftrace instances"
+ tristate "Build sample module for kernel access to Ftrace instances"
depends on EVENT_TRACING && m
help
- This builds a module that demonstrates the use of various APIs to
- access Ftrace instances from within the kernel.
+ This builds a module that demonstrates the use of various APIs to
+ access Ftrace instances from within the kernel.
config SAMPLE_KOBJECT
tristate "Build kobject examples"
@@ -290,11 +290,11 @@ config SAMPLE_CORESIGHT_SYSCFG
configurations and easily load them into the system at runtime.
config SAMPLE_KMEMLEAK
- tristate "Simple test for the kernel memory leak detector"
- depends on DEBUG_KMEMLEAK && m
- help
- Build a sample program which have explicitly leaks memory to test
- kmemleak
+ tristate "Simple test for the kernel memory leak detector"
+ depends on DEBUG_KMEMLEAK && m
+ help
+ Build a sample program which have explicitly leaks memory to test
+ kmemleak.
config SAMPLE_CGROUP
bool "Build cgroup sample code"
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 6554b73a75c8..9256ca5d762b 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts
+userccflags += -I $(srctree)/tools/testing/selftests/
userccflags += -I usr/include
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 92669904eecc..d58ca9655ab7 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -860,6 +860,10 @@ our %deprecated_apis = (
"kunmap" => "kunmap_local",
"kmap_atomic" => "kmap_local_page",
"kunmap_atomic" => "kunmap_local",
+ #These should be enough to drive away new IDR users
+ "DEFINE_IDR" => "DEFINE_XARRAY",
+ "idr_init" => "xa_init",
+ "idr_init_base" => "xa_init_flags"
);
#Create a search pattern for all these strings to speed up a loop below
@@ -3345,6 +3349,13 @@ sub process {
}
}
+# Check for auto-generated unhandled placeholder text (mostly for cover letters)
+ if (($in_commit_log || $in_header_lines) &&
+ $rawline =~ /(?:SUBJECT|BLURB) HERE/) {
+ ERROR("PLACEHOLDER_USE",
+ "Placeholder text detected\n" . $herecurr);
+ }
+
# Check for git id commit length and improperly formed commit descriptions
# A correctly formed commit description is:
# commit <SHA-1 hash length 12+ chars> ("Complete commit subject")
diff --git a/scripts/gdb/linux/bpf.py b/scripts/gdb/linux/bpf.py
new file mode 100644
index 000000000000..1870534ef6f9
--- /dev/null
+++ b/scripts/gdb/linux/bpf.py
@@ -0,0 +1,253 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import subprocess
+import tempfile
+
+import gdb
+
+from linux import constants, lists, radixtree, utils
+
+
+if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT:
+ bpf_ksym_type = utils.CachedType("struct bpf_ksym")
+if constants.LX_CONFIG_BPF_SYSCALL:
+ bpf_prog_type = utils.CachedType("struct bpf_prog")
+
+
+def get_ksym_name(ksym):
+ name = ksym["name"].bytes
+ end = name.find(b"\x00")
+ if end != -1:
+ name = name[:end]
+ return name.decode()
+
+
+def list_ksyms():
+ if not (constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT):
+ return []
+ bpf_kallsyms = gdb.parse_and_eval("&bpf_kallsyms")
+ bpf_ksym_ptr_type = bpf_ksym_type.get_type().pointer()
+ return list(lists.list_for_each_entry(bpf_kallsyms,
+ bpf_ksym_ptr_type,
+ "lnode"))
+
+
+class KsymAddBreakpoint(gdb.Breakpoint):
+ def __init__(self, monitor):
+ super(KsymAddBreakpoint, self).__init__("bpf_ksym_add", internal=True)
+ self.silent = True
+ self.monitor = monitor
+
+ def stop(self):
+ self.monitor.add(gdb.parse_and_eval("ksym"))
+ return False
+
+
+class KsymRemoveBreakpoint(gdb.Breakpoint):
+ def __init__(self, monitor):
+ super(KsymRemoveBreakpoint, self).__init__("bpf_ksym_del",
+ internal=True)
+ self.silent = True
+ self.monitor = monitor
+
+ def stop(self):
+ self.monitor.remove(gdb.parse_and_eval("ksym"))
+ return False
+
+
+class KsymMonitor:
+ def __init__(self, add, remove):
+ self.add = add
+ self.remove = remove
+
+ self.add_bp = KsymAddBreakpoint(self)
+ self.remove_bp = KsymRemoveBreakpoint(self)
+
+ self.notify_initial()
+
+ def notify_initial(self):
+ for ksym in list_ksyms():
+ self.add(ksym)
+
+ def delete(self):
+ self.add_bp.delete()
+ self.remove_bp.delete()
+
+
+def list_progs():
+ if not constants.LX_CONFIG_BPF_SYSCALL:
+ return []
+ idr_rt = gdb.parse_and_eval("&prog_idr.idr_rt")
+ bpf_prog_ptr_type = bpf_prog_type.get_type().pointer()
+ progs = []
+ for _, slot in radixtree.for_each_slot(idr_rt):
+ prog = slot.dereference().cast(bpf_prog_ptr_type)
+ progs.append(prog)
+ # Subprogs are not registered in prog_idr, fetch them manually.
+ # func[0] is the current prog.
+ aux = prog["aux"]
+ func = aux["func"]
+ real_func_cnt = int(aux["real_func_cnt"])
+ for i in range(1, real_func_cnt):
+ progs.append(func[i])
+ return progs
+
+
+class ProgAddBreakpoint(gdb.Breakpoint):
+ def __init__(self, monitor):
+ super(ProgAddBreakpoint, self).__init__("bpf_prog_kallsyms_add",
+ internal=True)
+ self.silent = True
+ self.monitor = monitor
+
+ def stop(self):
+ self.monitor.add(gdb.parse_and_eval("fp"))
+ return False
+
+
+class ProgRemoveBreakpoint(gdb.Breakpoint):
+ def __init__(self, monitor):
+ super(ProgRemoveBreakpoint, self).__init__("bpf_prog_free_id",
+ internal=True)
+ self.silent = True
+ self.monitor = monitor
+
+ def stop(self):
+ self.monitor.remove(gdb.parse_and_eval("prog"))
+ return False
+
+
+class ProgMonitor:
+ def __init__(self, add, remove):
+ self.add = add
+ self.remove = remove
+
+ self.add_bp = ProgAddBreakpoint(self)
+ self.remove_bp = ProgRemoveBreakpoint(self)
+
+ self.notify_initial()
+
+ def notify_initial(self):
+ for prog in list_progs():
+ self.add(prog)
+
+ def delete(self):
+ self.add_bp.delete()
+ self.remove_bp.delete()
+
+
+def btf_str_by_offset(btf, offset):
+ while offset < btf["start_str_off"]:
+ btf = btf["base_btf"]
+
+ offset -= btf["start_str_off"]
+ if offset < btf["hdr"]["str_len"]:
+ return (btf["strings"] + offset).string()
+
+ return None
+
+
+def bpf_line_info_line_num(line_col):
+ return line_col >> 10
+
+
+def bpf_line_info_line_col(line_col):
+ return line_col & 0x3ff
+
+
+class LInfoIter:
+ def __init__(self, prog):
+ # See bpf_prog_get_file_line() for details.
+ self.pos = 0
+ self.nr_linfo = 0
+
+ if prog is None:
+ return
+
+ self.bpf_func = int(prog["bpf_func"])
+ aux = prog["aux"]
+ self.btf = aux["btf"]
+ linfo_idx = aux["linfo_idx"]
+ self.nr_linfo = int(aux["nr_linfo"]) - linfo_idx
+ if self.nr_linfo == 0:
+ return
+
+ linfo_ptr = aux["linfo"]
+ tpe = linfo_ptr.type.target().array(self.nr_linfo).pointer()
+ self.linfo = (linfo_ptr + linfo_idx).cast(tpe).dereference()
+ jited_linfo_ptr = aux["jited_linfo"]
+ tpe = jited_linfo_ptr.type.target().array(self.nr_linfo).pointer()
+ self.jited_linfo = (jited_linfo_ptr + linfo_idx).cast(tpe).dereference()
+
+ self.filenos = {}
+
+ def get_code_off(self):
+ if self.pos >= self.nr_linfo:
+ return -1
+ return self.jited_linfo[self.pos] - self.bpf_func
+
+ def advance(self):
+ self.pos += 1
+
+ def get_fileno(self):
+ file_name_off = int(self.linfo[self.pos]["file_name_off"])
+ fileno = self.filenos.get(file_name_off)
+ if fileno is not None:
+ return fileno, None
+ file_name = btf_str_by_offset(self.btf, file_name_off)
+ fileno = len(self.filenos) + 1
+ self.filenos[file_name_off] = fileno
+ return fileno, file_name
+
+ def get_line_col(self):
+ line_col = int(self.linfo[self.pos]["line_col"])
+ return bpf_line_info_line_num(line_col), \
+ bpf_line_info_line_col(line_col)
+
+
+def generate_debug_obj(ksym, prog):
+ name = get_ksym_name(ksym)
+ # Avoid read_memory(); it throws bogus gdb.MemoryError in some contexts.
+ start = ksym["start"]
+ code = start.cast(gdb.lookup_type("unsigned char")
+ .array(int(ksym["end"]) - int(start))
+ .pointer()).dereference().bytes
+ linfo_iter = LInfoIter(prog)
+
+ result = tempfile.NamedTemporaryFile(suffix=".o", mode="wb")
+ try:
+ with tempfile.NamedTemporaryFile(suffix=".s", mode="w") as src:
+ # ".loc" does not apply to ".byte"s, only to ".insn"s, but since
+ # this needs to work for all architectures, the latter are not an
+ # option. Ask the assembler to apply ".loc"s to labels as well,
+ # and generate dummy labels after each ".loc".
+ src.write(".loc_mark_labels 1\n")
+
+ src.write(".globl {}\n".format(name))
+ src.write(".type {},@function\n".format(name))
+ src.write("{}:\n".format(name))
+ for code_off, code_byte in enumerate(code):
+ if linfo_iter.get_code_off() == code_off:
+ fileno, file_name = linfo_iter.get_fileno()
+ if file_name is not None:
+ src.write(".file {} {}\n".format(
+ fileno, json.dumps(file_name)))
+ line, col = linfo_iter.get_line_col()
+ src.write(".loc {} {} {}\n".format(fileno, line, col))
+ src.write("0:\n")
+ linfo_iter.advance()
+ src.write(".byte {}\n".format(code_byte))
+ src.write(".size {},{}\n".format(name, len(code)))
+ src.flush()
+
+ try:
+ subprocess.check_call(["as", "-c", src.name, "-o", result.name])
+ except FileNotFoundError:
+ # "as" is not installed.
+ result.close()
+ return None
+ return result
+ except:
+ result.close()
+ raise
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index c3886739a028..6d475540c6ba 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -170,3 +170,6 @@ LX_CONFIG(CONFIG_PAGE_OWNER)
LX_CONFIG(CONFIG_SLUB_DEBUG)
LX_CONFIG(CONFIG_SLAB_FREELIST_HARDENED)
LX_CONFIG(CONFIG_MMU)
+LX_CONFIG(CONFIG_BPF)
+LX_CONFIG(CONFIG_BPF_JIT)
+LX_CONFIG(CONFIG_BPF_SYSCALL)
diff --git a/scripts/gdb/linux/radixtree.py b/scripts/gdb/linux/radixtree.py
index 074543ac763d..bc2954e45c32 100644
--- a/scripts/gdb/linux/radixtree.py
+++ b/scripts/gdb/linux/radixtree.py
@@ -30,13 +30,16 @@ def entry_to_node(node):
def node_maxindex(node):
return (constants.LX_RADIX_TREE_MAP_SIZE << node['shift']) - 1
-def lookup(root, index):
+def resolve_root(root):
+ if root.type == radix_tree_root_type.get_type():
+ return root
if root.type == radix_tree_root_type.get_type().pointer():
- node = root.dereference()
- elif root.type != radix_tree_root_type.get_type():
- raise gdb.GdbError("must be {} not {}"
- .format(radix_tree_root_type.get_type(), root.type))
+ return root.dereference()
+ raise gdb.GdbError("must be {} not {}"
+ .format(radix_tree_root_type.get_type(), root.type))
+def lookup(root, index):
+ root = resolve_root(root)
node = root['xa_head']
if node == 0:
return None
@@ -71,14 +74,120 @@ def lookup(root, index):
return node
-class LxRadixTree(gdb.Function):
+def descend(parent, index):
+ offset = (index >> int(parent["shift"])) & constants.LX_RADIX_TREE_MAP_MASK
+ return offset, parent["slots"][offset]
+
+def load_root(root):
+ node = root["xa_head"]
+ nodep = node
+
+ if is_internal_node(node):
+ node = entry_to_node(node)
+ maxindex = node_maxindex(node)
+ return int(node["shift"]) + constants.LX_RADIX_TREE_MAP_SHIFT, \
+ nodep, maxindex
+
+ return 0, nodep, 0
+
+class RadixTreeIter:
+ def __init__(self, start):
+ self.index = 0
+ self.next_index = start
+ self.node = None
+
+def xa_mk_internal(v):
+ return (v << 2) | 2
+
+LX_XA_RETRY_ENTRY = xa_mk_internal(256)
+LX_RADIX_TREE_RETRY = LX_XA_RETRY_ENTRY
+
+def next_chunk(root, iter):
+ mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1
+
+ index = iter.next_index
+ if index == 0 and iter.index != 0:
+ return None
+
+ restart = True
+ while restart:
+ restart = False
+
+ _, child, maxindex = load_root(root)
+ if index > maxindex:
+ return None
+ if not child:
+ return None
+
+ if not is_internal_node(child):
+ iter.index = index
+ iter.next_index = (maxindex + 1) & mask
+ iter.node = None
+ return root["xa_head"].address
+
+ while True:
+ node = entry_to_node(child)
+ offset, child = descend(node, index)
+
+ if not child:
+ while True:
+ offset += 1
+ if offset >= constants.LX_RADIX_TREE_MAP_SIZE:
+ break
+ slot = node["slots"][offset]
+ if slot:
+ break
+ index &= ~node_maxindex(node)
+ index = (index + (offset << int(node["shift"]))) & mask
+ if index == 0:
+ return None
+ if offset == constants.LX_RADIX_TREE_MAP_SIZE:
+ restart = True
+ break
+ child = node["slots"][offset]
+
+ if not child:
+ restart = True
+ break
+ if child == LX_XA_RETRY_ENTRY:
+ break
+ if not node["shift"] or not is_internal_node(child):
+ break
+
+ iter.index = (index & ~node_maxindex(node)) | offset
+ iter.next_index = ((index | node_maxindex(node)) + 1) & mask
+ iter.node = node
+
+ return node["slots"][offset].address
+
+def next_slot(slot, iter):
+ mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1
+ for _ in range(iter.next_index - iter.index - 1):
+ slot += 1
+ iter.index = (iter.index + 1) & mask
+ if slot.dereference():
+ return slot
+ return None
+
+def for_each_slot(root, start=0):
+ iter = RadixTreeIter(start)
+ slot = None
+ while True:
+ if not slot:
+ slot = next_chunk(root, iter)
+ if not slot:
+ break
+ yield iter.index, slot
+ slot = next_slot(slot, iter)
+
+class LxRadixTreeLookup(gdb.Function):
""" Lookup and return a node from a RadixTree.
$lx_radix_tree_lookup(root_node [, index]): Return the node at the given index.
If index is omitted, the root node is dereference and returned."""
def __init__(self):
- super(LxRadixTree, self).__init__("lx_radix_tree_lookup")
+ super(LxRadixTreeLookup, self).__init__("lx_radix_tree_lookup")
def invoke(self, root, index=0):
result = lookup(root, index)
@@ -87,4 +196,20 @@ If index is omitted, the root node is dereference and returned."""
return result
+class LxRadixTree(gdb.Command):
+ """Show all values stored in a RadixTree."""
+
+ def __init__(self):
+ super(LxRadixTree, self).__init__("lx-radix-tree", gdb.COMMAND_DATA,
+ gdb.COMPLETE_NONE)
+
+ def invoke(self, argument, from_tty):
+ args = gdb.string_to_argv(argument)
+ if len(args) != 1:
+ raise gdb.GdbError("Usage: lx-radix-tree ROOT")
+ root = gdb.parse_and_eval(args[0])
+ for index, slot in for_each_slot(root):
+ gdb.write("[{}] = {}\n".format(index, slot.dereference()))
+
LxRadixTree()
+LxRadixTreeLookup()
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 6edb99221675..d4308b726183 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -11,13 +11,14 @@
# This work is licensed under the terms of the GNU GPL version 2.
#
+import atexit
import gdb
import os
import re
import struct
from itertools import count
-from linux import modules, utils, constants
+from linux import bpf, constants, modules, utils
if hasattr(gdb, 'Breakpoint'):
@@ -114,17 +115,27 @@ class LxSymbols(gdb.Command):
The kernel (vmlinux) is taken from the current working directly. Modules (.ko)
are scanned recursively, starting in the same directory. Optionally, the module
search path can be extended by a space separated list of paths passed to the
-lx-symbols command."""
+lx-symbols command.
+
+When the -bpf flag is specified, symbols from the currently loaded BPF programs
+are loaded as well."""
module_paths = []
module_files = []
module_files_updated = False
loaded_modules = []
breakpoint = None
+ bpf_prog_monitor = None
+ bpf_ksym_monitor = None
+ bpf_progs = {}
+ # The remove-symbol-file command, even when invoked with -a, requires the
+ # respective object file to exist, so keep them around.
+ bpf_debug_objs = {}
def __init__(self):
super(LxSymbols, self).__init__("lx-symbols", gdb.COMMAND_FILES,
gdb.COMPLETE_FILENAME)
+ atexit.register(self.cleanup_bpf)
def _update_module_files(self):
self.module_files = []
@@ -197,6 +208,51 @@ lx-symbols command."""
else:
gdb.write("no module object found for '{0}'\n".format(module_name))
+ def add_bpf_prog(self, prog):
+ if prog["jited"]:
+ self.bpf_progs[int(prog["bpf_func"])] = prog
+
+ def remove_bpf_prog(self, prog):
+ self.bpf_progs.pop(int(prog["bpf_func"]), None)
+
+ def add_bpf_ksym(self, ksym):
+ addr = int(ksym["start"])
+ name = bpf.get_ksym_name(ksym)
+ with utils.pagination_off():
+ gdb.write("loading @{addr}: {name}\n".format(
+ addr=hex(addr), name=name))
+ debug_obj = bpf.generate_debug_obj(ksym, self.bpf_progs.get(addr))
+ if debug_obj is None:
+ return
+ try:
+ cmdline = "add-symbol-file {obj} {addr}".format(
+ obj=debug_obj.name, addr=hex(addr))
+ gdb.execute(cmdline, to_string=True)
+ except:
+ debug_obj.close()
+ raise
+ self.bpf_debug_objs[addr] = debug_obj
+
+ def remove_bpf_ksym(self, ksym):
+ addr = int(ksym["start"])
+ debug_obj = self.bpf_debug_objs.pop(addr, None)
+ if debug_obj is None:
+ return
+ try:
+ name = bpf.get_ksym_name(ksym)
+ gdb.write("unloading @{addr}: {name}\n".format(
+ addr=hex(addr), name=name))
+ cmdline = "remove-symbol-file {path}".format(path=debug_obj.name)
+ gdb.execute(cmdline, to_string=True)
+ finally:
+ debug_obj.close()
+
+ def cleanup_bpf(self):
+ self.bpf_progs = {}
+ while len(self.bpf_debug_objs) > 0:
+ self.bpf_debug_objs.popitem()[1].close()
+
+
def load_all_symbols(self):
gdb.write("loading vmlinux\n")
@@ -224,34 +280,59 @@ lx-symbols command."""
else:
[self.load_module_symbols(module) for module in module_list]
+ self.cleanup_bpf()
+ if self.bpf_prog_monitor is not None:
+ self.bpf_prog_monitor.notify_initial()
+ if self.bpf_ksym_monitor is not None:
+ self.bpf_ksym_monitor.notify_initial()
+
for saved_state in saved_states:
saved_state['breakpoint'].enabled = saved_state['enabled']
def invoke(self, arg, from_tty):
skip_decompressor()
- self.module_paths = [os.path.abspath(os.path.expanduser(p))
- for p in arg.split()]
+ monitor_bpf = False
+ self.module_paths = []
+ for p in arg.split():
+ if p == "-bpf":
+ monitor_bpf = True
+ else:
+ p.append(os.path.abspath(os.path.expanduser(p)))
self.module_paths.append(os.getcwd())
+ if self.breakpoint is not None:
+ self.breakpoint.delete()
+ self.breakpoint = None
+ if self.bpf_prog_monitor is not None:
+ self.bpf_prog_monitor.delete()
+ self.bpf_prog_monitor = None
+ if self.bpf_ksym_monitor is not None:
+ self.bpf_ksym_monitor.delete()
+ self.bpf_ksym_monitor = None
+
# enforce update
self.module_files = []
self.module_files_updated = False
self.load_all_symbols()
- if not modules.has_modules():
+ if not hasattr(gdb, 'Breakpoint'):
+ gdb.write("Note: symbol update on module and BPF loading not "
+ "supported with this gdb version\n")
return
- if hasattr(gdb, 'Breakpoint'):
- if self.breakpoint is not None:
- self.breakpoint.delete()
- self.breakpoint = None
+ if modules.has_modules():
self.breakpoint = LoadModuleBreakpoint(
"kernel/module/main.c:do_init_module", self)
- else:
- gdb.write("Note: symbol update on module loading not supported "
- "with this gdb version\n")
+
+ if monitor_bpf:
+ if constants.LX_CONFIG_BPF_SYSCALL:
+ self.bpf_prog_monitor = bpf.ProgMonitor(self.add_bpf_prog,
+ self.remove_bpf_prog)
+ if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT:
+ self.bpf_ksym_monitor = bpf.KsymMonitor(self.add_bpf_ksym,
+ self.remove_bpf_ksym)
LxSymbols()
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c46ebdb9b8ef..56e44a98d6a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -54,6 +54,7 @@ TARGETS += kvm
TARGETS += landlock
TARGETS += lib
TARGETS += livepatch
+TARGETS += liveupdate
TARGETS += lkdtm
TARGETS += lsm
TARGETS += membarrier
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
index 87c044fb9293..421adbdc299d 100644
--- a/tools/testing/selftests/acct/acct_syscall.c
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -9,7 +9,7 @@
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index dc7f40e68dee..317212078e36 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -14,7 +14,7 @@
#include <regex.h>
#include <sys/stat.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define SYSFS_ROOT "/sys"
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index e113dafa5c24..d4f845c32804 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -25,7 +25,7 @@
#include <poll.h>
#include <stdint.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define TESTS_PER_CONTROL 7
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index ce92548670c8..ee04ccef7d7c 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -17,7 +17,7 @@
#include <assert.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
typedef struct timespec timestamp_t;
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index ca81afa4ee90..95065ef3b441 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -7,7 +7,7 @@
*/
#include <string.h>
#include <alsa/asoundlib.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define CH_NUM 4
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
index 37964f311a33..c45cb226bd8f 100644
--- a/tools/testing/selftests/alsa/utimer-test.c
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -6,7 +6,7 @@
*
* Author: Ivan Orlov <ivan.orlov0322@gmail.com>
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <sound/asound.h>
#include <unistd.h>
#include <fcntl.h>
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 3b96d090c5eb..c41640f18e4e 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -19,7 +19,7 @@
#include <linux/auxvec.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define TESTS_PER_HWCAP 3
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index b51d21f78cf9..0e46ac21c81d 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define EXPECTED_TESTS 11
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 5ec9a18ec802..b67e3e26fa6d 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -16,7 +16,7 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "syscall-abi.h"
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 0114108ab25f..22c584b78be5 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -27,7 +27,7 @@
#include <asm/sve_context.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "fp-ptrace.h"
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
index 9349aa630c84..65e01aba96ff 100644
--- a/tools/testing/selftests/arm64/fp/fp-stress.c
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define MAX_VLS 16
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index a24eca7a4ecb..df0c1b6eb114 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -12,7 +12,7 @@
#include <sys/prctl.h>
#include <asm/sigcontext.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index f44d44618575..28f6b996c5e2 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index 2d75d342eeb9..8dd932fdcdc4 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
#define ARCH_MIN_VL SVE_VL_MIN
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
index 08c777f87ea2..787eed22d059 100644
--- a/tools/testing/selftests/arm64/fp/za-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
index a7f34040fbf1..f3fa49fd0fbd 100644
--- a/tools/testing/selftests/arm64/fp/zt-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
index cf316d78ea97..86d8cd42aee7 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-stress.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
struct child_data {
char *name, *output;
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 6d21b2fc758d..67d138057707 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -10,7 +10,7 @@
#include <setjmp.h>
#include <sched.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "helper.h"
#define PAC_COLLISION_ATTEMPTS 1000
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 8ae26e496c89..375ab47f0edb 100644
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -6,7 +6,7 @@
#include <stdint.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 9234a58b0a97..05b3cebc5ca9 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -96,7 +96,7 @@
#include "xskxceiver.h"
#include <bpf/bpf.h>
#include <linux/filter.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "xsk_xdp_common.h"
#include <network_helpers.h>
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index d46962a24724..1159d81890c2 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -18,7 +18,7 @@
#include <errno.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define COUNT_ISN_BPS 4
#define COUNT_WPS 4
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index e7041816085a..5fc0f37f3fd4 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -26,7 +26,7 @@
#include <errno.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static volatile uint8_t var[96] __attribute__((__aligned__(32)));
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 8d233ac95696..ca2aaab9e4ca 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
void child(int cpu)
{
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index ab838bcb9ec5..542cd09cb443 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -16,7 +16,7 @@
#include <fcntl.h>
#include <errno.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define NR_TESTS 9
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 47bad7ddc5bc..46fc8d46b6e6 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -18,7 +18,7 @@
#include <sys/prctl.h>
#include <sys/stat.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int nerrs;
static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index 65f2a1c89239..cef1d9937b9f 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -7,7 +7,7 @@
#include <sys/prctl.h>
#include <sys/auxv.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
# define HAVE_GETAUXVAL
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 1d778c8b7764..102262555a59 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -17,7 +17,7 @@
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static bool nsdelegate;
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index b1b30e82dd7c..c83f05438d7c 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -11,7 +11,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
enum hog_clock_type {
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
index 8086d2ea394f..c5cf8b56ceb8 100644
--- a/tools/testing/selftests/cgroup/test_cpuset.c
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -3,7 +3,7 @@
#include <linux/limits.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int idle_process_fn(const char *cgroup, void *arg)
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 465cdad2bfca..97fae92c8387 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
#define DEBUG
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
index 856f9508ea56..f451aa449be6 100644
--- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -7,7 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
#define ADDR ((void *)(0x0UL))
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
index ed590b150a17..c8c9d306925b 100644
--- a/tools/testing/selftests/cgroup/test_kill.c
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -9,7 +9,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "../pidfd/pidfd.h"
#include "cgroup_util.h"
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index d4c4a514ee43..ca38525484e3 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -14,7 +14,7 @@
#include <sys/sysinfo.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index b117325c0439..4e1647568c5b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -18,7 +18,7 @@
#include <errno.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static bool has_localevents;
diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c
index d8a1d1cd5007..9a387c815d2c 100644
--- a/tools/testing/selftests/cgroup/test_pids.c
+++ b/tools/testing/selftests/cgroup/test_pids.c
@@ -9,7 +9,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int run_success(const char *cgroup, void *arg)
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 86a8930b47e3..64ebc3f3f203 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -10,7 +10,7 @@
#include <sys/wait.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int read_int(const char *path, size_t *value)
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index e61f07973ce5..289e0c7c1f09 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -18,7 +18,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
enum test_mode {
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 3c196fa86c99..e82281efa273 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -24,7 +24,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "clone3_selftests.h"
static void child_exit(int ret)
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index ce0426786828..de0c9d62015d 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -13,7 +13,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
static void nop_handler(int signo)
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index eeca8005723f..a0593e8950f0 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -11,7 +11,7 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index bfb0da2b4fdd..5c944aee6b41 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -20,7 +20,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
#define MAX_PID_NS_LEVEL 32
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
index 4a825b997666..36c11467a8f1 100644
--- a/tools/testing/selftests/connector/proc_filter.c
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -16,7 +16,7 @@
#include <signal.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
sizeof(struct proc_input))
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index e0d9851fe1c9..f14eca63f20c 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
#include <linux/close_range.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c
index 7fec9dfb1b0e..ffce75a6c228 100644
--- a/tools/testing/selftests/core/unshare_test.c
+++ b/tools/testing/selftests/core/unshare_test.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
#include <linux/close_range.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
TEST(unshare_EMFILE)
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index c2e895bcc160..1ec88937a1c2 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -19,7 +19,7 @@
#include <sys/un.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../filesystems/wrappers.h"
#include "../pidfd/pidfd.h"
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 5d0a809dc2df..fc9694fc4e89 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -15,7 +15,7 @@
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <drm/drm.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define DEVPATH "/dev/dma_heap"
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 77aa2897e79f..d78aec662586 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -16,7 +16,7 @@
#include <sys/mman.h>
#include <linux/memfd.h>
#include <linux/udmabuf.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define TEST_PREFIX "drivers/dma-buf/udmabuf"
#define NUM_PAGES 4
diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index 995b492f5bcb..e894037d2e3e 100644
--- a/tools/testing/selftests/drivers/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -57,7 +57,7 @@
#include <string.h>
#include <unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "../../net/lib/ksft.h"
#define DPORT 8000
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index a4d04438c313..d23b3b0c20a3 100644
--- a/tools/testing/selftests/drivers/net/hw/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -55,7 +55,7 @@
#include <ynl.h>
#include "ethtool-user.h"
-#include "../../../kselftest.h"
+#include "kselftest.h"
#include "../../../net/lib/ksft.h"
#define TOEPLITZ_KEY_MIN_LEN 40
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
index 3aad311574c4..e6a37214aa46 100644
--- a/tools/testing/selftests/drivers/ntsync/ntsync.c
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -12,7 +12,7 @@
#include <time.h>
#include <pthread.h>
#include <linux/ntsync.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
static int read_sem_state(int sem, __u32 *count, __u32 *max)
{
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
index 7ee7492138c6..14df9aa07308 100644
--- a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -14,7 +14,7 @@
#include <asm/uvdevice.h>
-#include "../../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define UV_PATH "/dev/uv"
#define BUFFER_SIZE 0x200
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
index 55bce47e56b7..f2397e75aa7c 100644
--- a/tools/testing/selftests/exec/check-exec.c
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -30,7 +30,7 @@
#define _ASM_GENERIC_FCNTL_H
#include <linux/fcntl.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
char *const envp[], int flags)
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 8fb7395fd35b..d37c068ed5fe 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -21,7 +21,7 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TESTS_EXPECTED 54
#define TEST_NAME_LEN (PATH_MAX * 4)
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index 8257fddba8c8..55fd3732f029 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -6,7 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct Statistics {
unsigned long long load_address;
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
index cd3a34aca93e..14ac36487df5 100644
--- a/tools/testing/selftests/exec/non-regular.c
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -9,7 +9,7 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* Remove a file, ignoring the result if it didn't exist. */
void rm(struct __test_metadata *_metadata, const char *pathname,
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
index c19726e710d1..4940aee5bb38 100644
--- a/tools/testing/selftests/exec/null-argv.c
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -5,7 +5,7 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define FORK(exec) \
do { \
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index 438c8ff2fd26..7b5c4f6d1928 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -23,7 +23,7 @@
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
index e0319417124d..e397339495f6 100644
--- a/tools/testing/selftests/fchmodat2/fchmodat2_test.c
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -7,7 +7,7 @@
#include <syscall.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
{
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
index a55b79810ab2..ff8d47fc373a 100644
--- a/tools/testing/selftests/filelock/ofdlocks.c
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -6,7 +6,7 @@
#include <stdio.h>
#include <unistd.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int lock_set(int fd, struct flock *fl)
{
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
index 73e0a4d4fb2f..94c6c81c2301 100644
--- a/tools/testing/selftests/filesystems/anon_inode_test.c
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -6,7 +6,7 @@
#include <stdio.h>
#include <sys/stat.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "wrappers.h"
TEST(anon_inode_no_chown)
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 39a68078a79b..a1a79a6fef17 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,7 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define DEFAULT_THREADS 4
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b1fc9b916ace..54fea349204e 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -11,7 +11,7 @@
#include <asm/ioctls.h>
#include <sys/mount.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static bool terminal_dup2(int duplicate, int original)
{
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index 65ede506305c..8bc57a2ef966 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -11,7 +11,7 @@
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/eventfd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
struct epoll_mtcontext
{
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
index 72d51ad0ee0e..1b48f267157d 100644
--- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -11,7 +11,7 @@
#include <pthread.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define EVENTFD_TEST_ITERATIONS 100000UL
diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c
index 912a8b755c3b..551c4a0f395a 100644
--- a/tools/testing/selftests/filesystems/fclog.c
+++ b/tools/testing/selftests/filesystems/fclog.c
@@ -13,7 +13,7 @@
#include <unistd.h>
#include <sys/mount.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define ASSERT_ERRNO(expected, _t, seen) \
__EXPECT(expected, #expected, \
diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c
index 01dd89f8e52f..141badd671a9 100644
--- a/tools/testing/selftests/filesystems/file_stressor.c
+++ b/tools/testing/selftests/filesystems/file_stressor.c
@@ -12,7 +12,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <linux/types.h>
#include <linux/mount.h>
diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
index 8d124d1cacb2..0d1d012c35ed 100644
--- a/tools/testing/selftests/filesystems/fuse/fusectl_test.c
+++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
@@ -17,7 +17,7 @@
#include <sched.h>
#include <linux/limits.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections"
#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX"
diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c
index 16538b3b318e..84c2b910a60d 100644
--- a/tools/testing/selftests/filesystems/kernfs_test.c
+++ b/tools/testing/selftests/filesystems/kernfs_test.c
@@ -7,7 +7,7 @@
#include <sys/stat.h>
#include <sys/xattr.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "wrappers.h"
TEST(kernfs_listxattr)
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index e4b7c2b457ee..6381af6a40e3 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -19,7 +19,7 @@ typedef struct {
#include <sys/syscall.h>
#include <sys/fanotify.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../statmount/statmount.h"
#include "../utils.h"
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
index 9f57ca46e3af..320ee25dc8a5 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -19,7 +19,7 @@ typedef struct {
#include <sys/syscall.h>
#include <sys/fanotify.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../statmount/statmount.h"
#include "../utils.h"
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index a3d8015897e9..61e55dfbf121 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -12,7 +12,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define MNT_NS_COUNT 11
#define MNT_NS_LAST_INDEX 10
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 31db54b00e64..8924cea6aa4b 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -15,7 +15,7 @@
#include <sched.h>
#include <fcntl.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "log.h"
#include "../wrappers.h"
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index dc0449fa628f..3c0b93183348 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -12,7 +12,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../../pidfd/pidfd.h"
#include "log.h"
#include "../utils.h"
diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c
index 15f0834f7557..8bc82f38c42f 100644
--- a/tools/testing/selftests/filesystems/statmount/listmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c
@@ -11,7 +11,7 @@
#include <unistd.h>
#include "statmount.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef LISTMOUNT_REVERSE
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index f048042e53e9..6e53430423d2 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -13,7 +13,7 @@
#include <linux/stat.h>
#include "statmount.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
static const char *const known_fs[] = {
"9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs",
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index 605a3fa16bf7..d56d4103182f 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -15,7 +15,7 @@
#include "statmount.h"
#include "../utils.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
#define NSID_PASS 0
#define NSID_FAIL 1
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index a0c64f415a7f..c9dd5412b37b 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -20,7 +20,7 @@
#include <sys/xattr.h>
#include <sys/mount.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "wrappers.h"
#include "utils.h"
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index d037a3f10ee8..ab8555752137 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -18,7 +18,7 @@
#include "futextest.h"
#include "futex2test.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define MAX_THREADS 64
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index 3b7b5851f290..e8079d7c65e8 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -14,7 +14,7 @@
#include <linux/prctl.h>
#include <sys/prctl.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define MAX_THREADS 64
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
index 69e2555b6039..35d4be23db5d 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -9,7 +9,7 @@
#include <limits.h>
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index f299d75848cd..46d2858e15a8 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -29,7 +29,7 @@
#include "atomic.h"
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define MAX_WAKE_ITERS 1000
#define THREAD_MAX 10
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index 77135a22a583..f686e605359c 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -25,7 +25,7 @@
#include <time.h>
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index e34ee0f9ebcc..a18ccae73eb1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -27,7 +27,7 @@
#include "atomic.h"
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define DELAY_US 100
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
index 152ca4612886..0e69c53524c1 100644
--- a/tools/testing/selftests/futex/functional/futex_wait.c
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -11,7 +11,7 @@
#include <fcntl.h>
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index 8952ebda14ab..2a749f9b14eb 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -28,7 +28,7 @@
#include <signal.h>
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define PAGE_SZ 4096
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 0c8766aced2e..674dd13af421 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -19,7 +19,7 @@
#include "futextest.h"
#include "futex2test.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
static long timeout_ns = 100000; /* 100us default timeout */
static futex_t futex_pi;
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index ce2301500d83..b07d68a67f31 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -30,7 +30,7 @@
#include <libgen.h>
#include "futextest.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define WAIT_US 5000000
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 36b7a54a4085..9ff936ecf164 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -24,7 +24,7 @@
#include "futextest.h"
#include "futex2test.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define timeout_ns 100000
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
index c684b10eb76e..d60876164d4b 100644
--- a/tools/testing/selftests/futex/functional/futex_waitv.c
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -18,7 +18,7 @@
#include "futextest.h"
#include "futex2test.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define WAKE_WAIT_US 10000
#define NR_FUTEXES 30
diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h
index 8085519c47cb..e3b267446fa0 100644
--- a/tools/testing/selftests/hid/hid_common.h
+++ b/tools/testing/selftests/hid/hid_common.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2022-2024 Red Hat */
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <fcntl.h>
#include <fnmatch.h>
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index a8acf3996973..953b63e5aa6a 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -11,7 +11,7 @@
#include <errno.h>
#include <string.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MSEC_PER_SEC 1000L
#define NSEC_PER_MSEC 1000000L
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 0a0ff6f7926d..5502751d500c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -11,7 +11,7 @@
#include <assert.h>
#include <poll.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
/* Hack to make assertions more readable */
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 5e36aeeb9901..e107379d185c 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -7,7 +7,7 @@
#include <sys/msg.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MAX_MSG_SIZE 32
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index f4a15cbdd5ea..adfcf50b1264 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -23,7 +23,7 @@
#include <dirent.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TEST_SCANCODES 10
#define SYSFS_PATH_MAX 256
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index d7a8e321bb16..79aa438b7479 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -18,7 +18,7 @@
#include <sys/wait.h>
#include <sys/epoll.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
{
diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh
index 3f6c17166846..49fdac8e8b15 100755
--- a/tools/testing/selftests/kho/vmtest.sh
+++ b/tools/testing/selftests/kho/vmtest.sh
@@ -59,6 +59,7 @@ function build_kernel() {
tee "$kconfig" > "$kho_config" <<EOF
CONFIG_BLK_DEV_INITRD=y
CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
CONFIG_TEST_KEXEC_HANDOVER=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_VM=y
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 3f66e862e83e..baae6b7ded41 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -14,7 +14,7 @@
*
* .. code-block:: c
*
- * #include "../kselftest_harness.h"
+ * #include "kselftest_harness.h"
*
* TEST(standalone_test) {
* do_some_stuff;
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c
index b555493bdb4d..7820bb5d0e6d 100644
--- a/tools/testing/selftests/kselftest_harness/harness-selftest.c
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c
@@ -8,7 +8,7 @@
/* Avoid any inconsistencies */
#define TH_LOG_STREAM stdout
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static void test_helper(struct __test_metadata *_metadata)
{
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index 02fd1393947a..44eb433e9666 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -20,7 +20,7 @@
#include <sys/time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 9acecae36f51..230b75f6015b 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -17,7 +17,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "wrappers.h"
#define TMP_DIR "tmp"
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a448fae57831..f02cc8a2e4ae 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -199,6 +199,9 @@ clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
# Build with _GNU_SOURCE by default
CFLAGS += -D_GNU_SOURCE=
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I${top_srcdir}/tools/testing/selftests
+
# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
# make USERCFLAGS=-Werror USERLDFLAGS=-static
CFLAGS += $(USERCFLAGS)
diff --git a/tools/testing/selftests/liveupdate/.gitignore b/tools/testing/selftests/liveupdate/.gitignore
new file mode 100644
index 000000000000..661827083ab6
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/.gitignore
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*
+!/**/
+!*.c
+!*.h
+!*.sh
+!.gitignore
+!config
+!Makefile
diff --git a/tools/testing/selftests/liveupdate/Makefile b/tools/testing/selftests/liveupdate/Makefile
new file mode 100644
index 000000000000..080754787ede
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+LIB_C += luo_test_utils.c
+
+TEST_GEN_PROGS += liveupdate
+
+TEST_GEN_PROGS_EXTENDED += luo_kexec_simple
+TEST_GEN_PROGS_EXTENDED += luo_multi_session
+
+TEST_FILES += do_kexec.sh
+
+include ../lib.mk
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -MD
+
+LIB_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIB_C))
+TEST_O := $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_O += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+
+TEST_DEP_FILES := $(patsubst %.o, %.d, $(LIB_O))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(TEST_O))
+-include $(TEST_DEP_FILES)
+
+$(LIB_O): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/%: %.o $(LIB_O)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIB_O) $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIB_O)
+EXTRA_CLEAN += $(TEST_O)
+EXTRA_CLEAN += $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/liveupdate/config b/tools/testing/selftests/liveupdate/config
new file mode 100644
index 000000000000..91d03f9a6a39
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/config
@@ -0,0 +1,11 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_ENABLE_DEFAULT=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
+CONFIG_KEXEC_HANDOVER_DEBUG=y
+CONFIG_LIVEUPDATE=y
+CONFIG_LIVEUPDATE_TEST=y
+CONFIG_MEMFD_CREATE=y
+CONFIG_TMPFS=y
+CONFIG_SHMEM=y
diff --git a/tools/testing/selftests/liveupdate/do_kexec.sh b/tools/testing/selftests/liveupdate/do_kexec.sh
new file mode 100755
index 000000000000..3c7c6cafbef8
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/do_kexec.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+# Use $KERNEL and $INITRAMFS to pass custom Kernel and optional initramfs
+
+KERNEL="${KERNEL:-/boot/bzImage}"
+set -- -l -s --reuse-cmdline "$KERNEL"
+
+INITRAMFS="${INITRAMFS:-/boot/initramfs}"
+if [ -f "$INITRAMFS" ]; then
+ set -- "$@" --initrd="$INITRAMFS"
+fi
+
+kexec "$@"
+kexec -e
diff --git a/tools/testing/selftests/liveupdate/liveupdate.c b/tools/testing/selftests/liveupdate/liveupdate.c
new file mode 100644
index 000000000000..c2878e3d5ef9
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/liveupdate.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/*
+ * Selftests for the Live Update Orchestrator.
+ * This test suite verifies the functionality and behavior of the
+ * /dev/liveupdate character device and its session management capabilities.
+ *
+ * Tests include:
+ * - Device access: basic open/close, and enforcement of exclusive access.
+ * - Session management: creation of unique sessions, and duplicate name detection.
+ * - Resource preservation: successfully preserving individual and multiple memfds,
+ * verifying contents remain accessible.
+ * - Complex multi-session scenarios involving mixed empty and populated files.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <linux/liveupdate.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define LIVEUPDATE_DEV "/dev/liveupdate"
+
+FIXTURE(liveupdate_device) {
+ int fd1;
+ int fd2;
+};
+
+FIXTURE_SETUP(liveupdate_device)
+{
+ self->fd1 = -1;
+ self->fd2 = -1;
+}
+
+FIXTURE_TEARDOWN(liveupdate_device)
+{
+ if (self->fd1 >= 0)
+ close(self->fd1);
+ if (self->fd2 >= 0)
+ close(self->fd2);
+}
+
+/*
+ * Test Case: Basic Open and Close
+ *
+ * Verifies that the /dev/liveupdate device can be opened and subsequently
+ * closed without errors. Skips if the device does not exist.
+ */
+TEST_F(liveupdate_device, basic_open_close)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ ASSERT_EQ(close(self->fd1), 0);
+ self->fd1 = -1;
+}
+
+/*
+ * Test Case: Exclusive Open Enforcement
+ *
+ * Verifies that the /dev/liveupdate device can only be opened by one process
+ * at a time. It checks that a second attempt to open the device fails with
+ * the EBUSY error code.
+ */
+TEST_F(liveupdate_device, exclusive_open)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ self->fd2 = open(LIVEUPDATE_DEV, O_RDWR);
+ EXPECT_LT(self->fd2, 0);
+ EXPECT_EQ(errno, EBUSY);
+}
+
+/* Helper function to create a LUO session via ioctl. */
+static int create_session(int lu_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session args = {};
+
+ args.size = sizeof(args);
+ strncpy((char *)args.name, name, sizeof(args.name) - 1);
+
+ if (ioctl(lu_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &args))
+ return -errno;
+
+ return args.fd;
+}
+
+/*
+ * Test Case: Create Duplicate Session
+ *
+ * Verifies that attempting to create two sessions with the same name fails
+ * on the second attempt with EEXIST.
+ */
+TEST_F(liveupdate_device, create_duplicate_session)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "duplicate-session-test");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "duplicate-session-test");
+ EXPECT_LT(session_fd2, 0);
+ EXPECT_EQ(-session_fd2, EEXIST);
+
+ ASSERT_EQ(close(session_fd1), 0);
+}
+
+/*
+ * Test Case: Create Distinct Sessions
+ *
+ * Verifies that creating two sessions with different names succeeds.
+ */
+TEST_F(liveupdate_device, create_distinct_sessions)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "distinct-session-1");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "distinct-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+static int preserve_fd(int session_fd, int fd_to_preserve, __u64 token)
+{
+ struct liveupdate_session_preserve_fd args = {};
+
+ args.size = sizeof(args);
+ args.fd = fd_to_preserve;
+ args.token = token;
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &args))
+ return -errno;
+
+ return 0;
+}
+
+/*
+ * Test Case: Preserve MemFD
+ *
+ * Verifies that a valid memfd can be successfully preserved in a session and
+ * that its contents remain intact after the preservation call.
+ */
+TEST_F(liveupdate_device, preserve_memfd)
+{
+ const char *test_str = "hello liveupdate";
+ char read_buf[64] = {};
+ int session_fd, mem_fd;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd = memfd_create("test-memfd", 0);
+ ASSERT_GE(mem_fd, 0);
+
+ ASSERT_EQ(write(mem_fd, test_str, strlen(test_str)), strlen(test_str));
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd, 0x1234), 0);
+ ASSERT_EQ(close(session_fd), 0);
+
+ ASSERT_EQ(lseek(mem_fd, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd, read_buf, sizeof(read_buf)), strlen(test_str));
+ ASSERT_STREQ(read_buf, test_str);
+ ASSERT_EQ(close(mem_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Multiple MemFDs
+ *
+ * Verifies that multiple memfds can be preserved in a single session,
+ * each with a unique token, and that their contents remain distinct and
+ * correct after preservation.
+ */
+TEST_F(liveupdate_device, preserve_multiple_memfds)
+{
+ const char *test_str1 = "data for memfd one";
+ const char *test_str2 = "data for memfd two";
+ char read_buf[64] = {};
+ int session_fd, mem_fd1, mem_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-multi-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd1 = memfd_create("test-memfd-1", 0);
+ ASSERT_GE(mem_fd1, 0);
+ mem_fd2 = memfd_create("test-memfd-2", 0);
+ ASSERT_GE(mem_fd2, 0);
+
+ ASSERT_EQ(write(mem_fd1, test_str1, strlen(test_str1)), strlen(test_str1));
+ ASSERT_EQ(write(mem_fd2, test_str2, strlen(test_str2)), strlen(test_str2));
+
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd1, 0xAAAA), 0);
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd2, 0xBBBB), 0);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd1, read_buf, sizeof(read_buf)), strlen(test_str1));
+ ASSERT_STREQ(read_buf, test_str1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd2, read_buf, sizeof(read_buf)), strlen(test_str2));
+ ASSERT_STREQ(read_buf, test_str2);
+
+ ASSERT_EQ(close(mem_fd1), 0);
+ ASSERT_EQ(close(mem_fd2), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Complex Scenario
+ *
+ * Verifies a more complex scenario with multiple sessions and a mix of empty
+ * and non-empty memfds distributed across them.
+ */
+TEST_F(liveupdate_device, preserve_complex_scenario)
+{
+ const char *data1 = "data for session 1";
+ const char *data2 = "data for session 2";
+ char read_buf[64] = {};
+ int session_fd1, session_fd2;
+ int mem_fd_data1, mem_fd_empty1, mem_fd_data2, mem_fd_empty2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "complex-session-1");
+ ASSERT_GE(session_fd1, 0);
+ session_fd2 = create_session(self->fd1, "complex-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ mem_fd_data1 = memfd_create("data1", 0);
+ ASSERT_GE(mem_fd_data1, 0);
+ ASSERT_EQ(write(mem_fd_data1, data1, strlen(data1)), strlen(data1));
+
+ mem_fd_empty1 = memfd_create("empty1", 0);
+ ASSERT_GE(mem_fd_empty1, 0);
+
+ mem_fd_data2 = memfd_create("data2", 0);
+ ASSERT_GE(mem_fd_data2, 0);
+ ASSERT_EQ(write(mem_fd_data2, data2, strlen(data2)), strlen(data2));
+
+ mem_fd_empty2 = memfd_create("empty2", 0);
+ ASSERT_GE(mem_fd_empty2, 0);
+
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_data1, 0x1111), 0);
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_empty1, 0x2222), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_data2, 0x3333), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_empty2, 0x4444), 0);
+
+ ASSERT_EQ(lseek(mem_fd_data1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data1, read_buf, sizeof(read_buf)), strlen(data1));
+ ASSERT_STREQ(read_buf, data1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd_data2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data2, read_buf, sizeof(read_buf)), strlen(data2));
+ ASSERT_STREQ(read_buf, data2);
+
+ ASSERT_EQ(lseek(mem_fd_empty1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty1, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(lseek(mem_fd_empty2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty2, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(close(mem_fd_data1), 0);
+ ASSERT_EQ(close(mem_fd_empty1), 0);
+ ASSERT_EQ(close(mem_fd_data2), 0);
+ ASSERT_EQ(close(mem_fd_empty2), 0);
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+/*
+ * Test Case: Preserve Unsupported File Descriptor
+ *
+ * Verifies that attempting to preserve a file descriptor that does not have
+ * a registered Live Update handler fails gracefully.
+ * Uses /dev/null as a representative of a file type (character device)
+ * that is not supported by the orchestrator.
+ */
+TEST_F(liveupdate_device, preserve_unsupported_fd)
+{
+ int session_fd, unsupported_fd;
+ int ret;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "unsupported-fd-test");
+ ASSERT_GE(session_fd, 0);
+
+ unsupported_fd = open("/dev/null", O_RDWR);
+ ASSERT_GE(unsupported_fd, 0);
+
+ ret = preserve_fd(session_fd, unsupported_fd, 0xDEAD);
+ EXPECT_EQ(ret, -ENOENT);
+
+ ASSERT_EQ(close(unsupported_fd), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/liveupdate/luo_kexec_simple.c b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
new file mode 100644
index 000000000000..d7ac1f3dc4cb
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A simple selftest to validate the end-to-end lifecycle of a LUO session
+ * across a single kexec reboot.
+ */
+
+#include "luo_test_utils.h"
+
+#define TEST_SESSION_NAME "test-session"
+#define TEST_MEMFD_TOKEN 0x1A
+#define TEST_MEMFD_DATA "hello kexec world"
+
+/* Constants for the state-tracking mechanism, specific to this test file. */
+#define STATE_SESSION_NAME "kexec_simple_state"
+#define STATE_MEMFD_TOKEN 999
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int session_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' and preserving memfd...\n",
+ TEST_SESSION_NAME);
+ session_fd = luo_create_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_create_session for '%s'", TEST_SESSION_NAME);
+
+ if (create_and_preserve_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ TEST_MEMFD_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int session_fd, mfd, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2)
+ fail_exit("Expected stage 2, but state file contains %d", stage);
+
+ ksft_print_msg("[STAGE 2] Retrieving session '%s'...\n", TEST_SESSION_NAME);
+ session_fd = luo_retrieve_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", TEST_SESSION_NAME);
+
+ ksft_print_msg("[STAGE 2] Restoring and verifying memfd (token %#x)...\n",
+ TEST_MEMFD_TOKEN);
+ mfd = restore_and_verify_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA);
+ if (mfd < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", TEST_MEMFD_TOKEN);
+ close(mfd);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+ ksft_print_msg("[STAGE 2] Finalizing test session...\n");
+ if (luo_session_finish(session_fd) < 0)
+ fail_exit("luo_session_finish for test session");
+ close(session_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- SIMPLE KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_multi_session.c b/tools/testing/selftests/liveupdate/luo_multi_session.c
new file mode 100644
index 000000000000..0ee2d795beef
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_multi_session.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A selftest to validate the end-to-end lifecycle of multiple LUO sessions
+ * across a kexec reboot, including empty sessions and sessions with multiple
+ * files.
+ */
+
+#include "luo_test_utils.h"
+
+#define SESSION_EMPTY_1 "multi-test-empty-1"
+#define SESSION_EMPTY_2 "multi-test-empty-2"
+#define SESSION_FILES_1 "multi-test-files-1"
+#define SESSION_FILES_2 "multi-test-files-2"
+
+#define MFD1_TOKEN 0x1001
+#define MFD2_TOKEN 0x2002
+#define MFD3_TOKEN 0x3003
+
+#define MFD1_DATA "Data for session files 1"
+#define MFD2_DATA "First file for session files 2"
+#define MFD3_DATA "Second file for session files 2"
+
+#define STATE_SESSION_NAME "kexec_multi_state"
+#define STATE_MEMFD_TOKEN 998
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup for multi-session test...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating empty sessions '%s' and '%s'...\n",
+ SESSION_EMPTY_1, SESSION_EMPTY_2);
+ s_empty1_fd = luo_create_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_create_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with one memfd...\n",
+ SESSION_FILES_1);
+
+ s_files1_fd = luo_create_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_1);
+ if (create_and_preserve_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD1_TOKEN);
+ }
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with two memfds...\n",
+ SESSION_FILES_2);
+
+ s_files2_fd = luo_create_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_2);
+ if (create_and_preserve_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD2_TOKEN);
+ }
+ if (create_and_preserve_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD3_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+ int mfd1, mfd2, mfd3, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2) {
+ fail_exit("Expected stage 2, but state file contains %d",
+ stage);
+ }
+
+ ksft_print_msg("[STAGE 2] Retrieving all sessions...\n");
+ s_empty1_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_2);
+
+ s_files1_fd = luo_retrieve_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_1);
+
+ s_files2_fd = luo_retrieve_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_2);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_1);
+ mfd1 = restore_and_verify_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA);
+ if (mfd1 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD1_TOKEN);
+ close(mfd1);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_2);
+
+ mfd2 = restore_and_verify_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA);
+ if (mfd2 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD2_TOKEN);
+ close(mfd2);
+
+ mfd3 = restore_and_verify_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA);
+ if (mfd3 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD3_TOKEN);
+ close(mfd3);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+
+ ksft_print_msg("[STAGE 2] Finalizing all test sessions...\n");
+ if (luo_session_finish(s_empty1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_1);
+ close(s_empty1_fd);
+
+ if (luo_session_finish(s_empty2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_2);
+ close(s_empty2_fd);
+
+ if (luo_session_finish(s_files1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_1);
+ close(s_files1_fd);
+
+ if (luo_session_finish(s_files2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_2);
+ close(s_files2_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- MULTI-SESSION KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.c b/tools/testing/selftests/liveupdate/luo_test_utils.c
new file mode 100644
index 000000000000..3c8721c505df
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#include "luo_test_utils.h"
+
+int luo_open_device(void)
+{
+ return open(LUO_DEVICE, O_RDWR);
+}
+
+int luo_create_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int luo_retrieve_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_retrieve_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_RETRIEVE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data)
+{
+ struct liveupdate_session_preserve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ mfd = memfd_create("test_mfd", 0);
+ if (mfd < 0)
+ return -errno;
+
+ if (ftruncate(mfd, page_size) != 0)
+ goto out;
+
+ map = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ snprintf(map, page_size, "%s", data);
+ munmap(map, page_size);
+
+ arg.fd = mfd;
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &arg) < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret != 0 && errno != 0)
+ ret = -errno;
+ if (mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int restore_and_verify_memfd(int session_fd, int token,
+ const char *expected_data)
+{
+ struct liveupdate_session_retrieve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_RETRIEVE_FD, &arg) < 0)
+ return -errno;
+ mfd = arg.fd;
+
+ map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ if (expected_data && strcmp(expected_data, map) != 0) {
+ ksft_print_msg("Data mismatch! Expected '%s', Got '%s'\n",
+ expected_data, (char *)map);
+ ret = -EINVAL;
+ goto out_munmap;
+ }
+
+ ret = mfd;
+out_munmap:
+ munmap(map, page_size);
+out:
+ if (ret < 0 && errno != 0)
+ ret = -errno;
+ if (ret < 0 && mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int luo_session_finish(int session_fd)
+{
+ struct liveupdate_session_finish arg = { .size = sizeof(arg) };
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_FINISH, &arg) < 0)
+ return -errno;
+
+ return 0;
+}
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage)
+{
+ char buf[32];
+ int state_session_fd;
+
+ state_session_fd = luo_create_session(luo_fd, session_name);
+ if (state_session_fd < 0)
+ fail_exit("luo_create_session for state tracking");
+
+ snprintf(buf, sizeof(buf), "%d", next_stage);
+ if (create_and_preserve_memfd(state_session_fd, token, buf) < 0)
+ fail_exit("create_and_preserve_memfd for state tracking");
+
+ /*
+ * DO NOT close session FD, otherwise it is going to be unpreserved
+ */
+}
+
+void restore_and_read_stage(int state_session_fd, int token, int *stage)
+{
+ char buf[32] = {0};
+ int mfd;
+
+ mfd = restore_and_verify_memfd(state_session_fd, token, NULL);
+ if (mfd < 0)
+ fail_exit("failed to restore state memfd");
+
+ if (read(mfd, buf, sizeof(buf) - 1) < 0)
+ fail_exit("failed to read state mfd");
+
+ *stage = atoi(buf);
+
+ close(mfd);
+}
+
+void daemonize_and_wait(void)
+{
+ pid_t pid;
+
+ ksft_print_msg("[STAGE 1] Forking persistent child to hold sessions...\n");
+
+ pid = fork();
+ if (pid < 0)
+ fail_exit("fork failed");
+
+ if (pid > 0) {
+ ksft_print_msg("[STAGE 1] Child PID: %d. Resources are pinned.\n", pid);
+ ksft_print_msg("[STAGE 1] You may now perform kexec reboot.\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Detach from terminal so closing the window doesn't kill us */
+ if (setsid() < 0)
+ fail_exit("setsid failed");
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Change dir to root to avoid locking filesystems */
+ if (chdir("/") < 0)
+ exit(EXIT_FAILURE);
+
+ while (1)
+ sleep(60);
+}
+
+static int parse_stage_args(int argc, char *argv[])
+{
+ static struct option long_options[] = {
+ {"stage", required_argument, 0, 's'},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ int stage = 1;
+ int opt;
+
+ optind = 1;
+ while ((opt = getopt_long(argc, argv, "s:", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 's':
+ stage = atoi(optarg);
+ if (stage != 1 && stage != 2)
+ fail_exit("Invalid stage argument");
+ break;
+ default:
+ fail_exit("Unknown argument");
+ }
+ }
+ return stage;
+}
+
+int luo_test(int argc, char *argv[],
+ const char *state_session_name,
+ luo_test_stage1_fn stage1,
+ luo_test_stage2_fn stage2)
+{
+ int target_stage = parse_stage_args(argc, argv);
+ int luo_fd = luo_open_device();
+ int state_session_fd;
+ int detected_stage;
+
+ if (luo_fd < 0) {
+ ksft_exit_skip("Failed to open %s. Is the luo module loaded?\n",
+ LUO_DEVICE);
+ }
+
+ state_session_fd = luo_retrieve_session(luo_fd, state_session_name);
+ if (state_session_fd == -ENOENT)
+ detected_stage = 1;
+ else if (state_session_fd >= 0)
+ detected_stage = 2;
+ else
+ fail_exit("Failed to check for state session");
+
+ if (target_stage != detected_stage) {
+ ksft_exit_fail_msg("Stage mismatch Requested --stage %d, but system is in stage %d.\n"
+ "(State session %s: %s)\n",
+ target_stage, detected_stage, state_session_name,
+ (detected_stage == 2) ? "EXISTS" : "MISSING");
+ }
+
+ if (target_stage == 1)
+ stage1(luo_fd);
+ else
+ stage2(luo_fd, state_session_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.h b/tools/testing/selftests/liveupdate/luo_test_utils.h
new file mode 100644
index 000000000000..90099bf49577
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Utility functions for LUO kselftests.
+ */
+
+#ifndef LUO_TEST_UTILS_H
+#define LUO_TEST_UTILS_H
+
+#include <errno.h>
+#include <string.h>
+#include <linux/liveupdate.h>
+#include "../kselftest.h"
+
+#define LUO_DEVICE "/dev/liveupdate"
+
+#define fail_exit(fmt, ...) \
+ ksft_exit_fail_msg("[%s:%d] " fmt " (errno: %s)\n", \
+ __func__, __LINE__, ##__VA_ARGS__, strerror(errno))
+
+int luo_open_device(void);
+int luo_create_session(int luo_fd, const char *name);
+int luo_retrieve_session(int luo_fd, const char *name);
+int luo_session_finish(int session_fd);
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data);
+int restore_and_verify_memfd(int session_fd, int token, const char *expected_data);
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage);
+void restore_and_read_stage(int state_session_fd, int token, int *stage);
+
+void daemonize_and_wait(void);
+
+typedef void (*luo_test_stage1_fn)(int luo_fd);
+typedef void (*luo_test_stage2_fn)(int luo_fd, int state_session_fd);
+
+int luo_test(int argc, char *argv[], const char *state_session_name,
+ luo_test_stage1_fn stage1, luo_test_stage2_fn stage2);
+
+#endif /* LUO_TEST_UTILS_H */
diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
index df215e4aa63f..60caf8528f81 100644
--- a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
@@ -13,7 +13,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp)
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 1cc8a977c711..54d59044ace1 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
TEST(size_null_lsm_list_modules)
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
index 732e89fe99c0..dcb6f8aa772e 100644
--- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
TEST(ctx_null_lsm_set_self_attr)
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index 93183a37b133..4396bf2273a4 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,7 +34,7 @@
#include <sys/stat.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 4b9953359e40..6e4a8090a0eb 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,7 +39,7 @@
#include <time.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index af89855adb7b..f6d7c44b2288 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_impl.h
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -7,7 +7,7 @@
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int registrations;
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index 17ed3e9917ca..cdd022c1c497 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -15,8 +15,8 @@
#include <string.h>
#include <fcntl.h>
-#include "../kselftest.h"
-#include "../kselftest_harness.h"
+#include "kselftest.h"
+#include "kselftest_harness.h"
/* Default test file size: 4MB */
#define MB (1UL << 20)
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 9bc4591c7b16..30209c40b697 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -16,7 +16,7 @@
#include <unistd.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MAP_SIZE_MB 100
#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 6560c26f47d1..accfd198dbda 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -27,7 +27,7 @@
#endif /* LOCAL_CONFIG_HAVE_LIBURING */
#include "../../../../mm/gup_test.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#include "thp_settings.h"
diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c
index f3d9ecf96890..44940f75c461 100644
--- a/tools/testing/selftests/mm/droppable.c
+++ b/tools/testing/selftests/mm/droppable.c
@@ -13,7 +13,7 @@
#include <sys/mman.h>
#include <linux/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char *argv[])
{
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 795bf3f39f44..dbd21d66d383 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <asm-generic/mman.h> /* Force the import of the tools version. */
#include <assert.h>
#include <errno.h>
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 268dadb8ce43..6279893a0adc 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -27,7 +27,7 @@
#endif /* LOCAL_CONFIG_HAVE_LIBURING */
#include "../../../../mm/gup_test.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
static size_t pagesize;
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index 40c1538a17b4..fb8f9ae49efa 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -12,7 +12,7 @@
#include <pthread.h>
#include <assert.h>
#include <mm/gup_test.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define MB (1UL << 20)
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 5a1525f72daa..e8328c89d855 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -10,7 +10,7 @@
* bugs.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <errno.h>
#include <fcntl.h>
diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
index 3b1b532f1cbb..d543419de040 100644
--- a/tools/testing/selftests/mm/hugepage-mmap.c
+++ b/tools/testing/selftests/mm/hugepage-mmap.c
@@ -15,7 +15,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index 2bd1dac75c3f..b8f7d92e5a35 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -24,7 +24,7 @@
#include <sys/ioctl.h>
#include <string.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define DEFAULT_LENGTH_MB 10UL
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index c5940c0595be..05d9d2805ae4 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -19,7 +19,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define MIN_FREE_PAGES 20
#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
index ba6cc6f9cabc..46230462ad48 100644
--- a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -11,7 +11,7 @@
#include <errno.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define PREFIX " ... "
#define ERROR_PREFIX " !!! "
diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c
index f086f0e04756..a8bc02688085 100644
--- a/tools/testing/selftests/mm/hugetlb-soft-offline.c
+++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c
@@ -24,7 +24,7 @@
#include <sys/statfs.h>
#include <sys/types.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef MADV_SOFT_OFFLINE
#define MADV_SOFT_OFFLINE 101
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
index db63abe5ee5e..9ac62eb4c97d 100644
--- a/tools/testing/selftests/mm/hugetlb_dio.c
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -18,7 +18,7 @@
#include <string.h>
#include <sys/mman.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
{
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
index e2640529dbb2..b4b257775b74 100644
--- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -9,7 +9,7 @@
#include <signal.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define INLOOP_ITER 100
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
index 8f122a0f0828..efd774b41389 100644
--- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -25,7 +25,7 @@
#include <unistd.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define INLOOP_ITER 100
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 95afa5cfc062..8d874c4754f3 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -21,7 +21,7 @@
#include <sys/wait.h>
#include <linux/userfaultfd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define KiB 1024u
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index b77462b5c240..a0b48b839d54 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -12,7 +12,7 @@
#include <stdint.h>
#include <err.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include <include/vdso/time64.h>
#include "vm_util.h"
#include "thp_settings.h"
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index d8d11bc67ddc..88050e0f829a 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -17,7 +17,7 @@
#include <linux/mman.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
/*
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index 1e9980b8993c..11241edde7fe 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static void dump_maps(void)
{
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index b47399feab53..aa409107611b 100644
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -11,7 +11,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 9df2636c829b..712327f4e932 100644
--- a/tools/testing/selftests/mm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -16,7 +16,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index 200bedcdc32e..647779653da0 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -14,7 +14,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef __aarch64__
# define PROT_BTI 0
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 9a0597310a76..aac4f795c327 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -22,7 +22,7 @@
#include <stdio.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index cc4253f47f10..363c1033cc7d 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <linux/prctl.h>
#include <fcntl.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index ea945eebec2f..ee24b88c2b24 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -4,7 +4,7 @@
* paths in the kernel.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "thp_settings.h"
#include <strings.h>
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 09feeb453646..68dd447a5454 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -22,7 +22,7 @@
#include <linux/userfaultfd.h>
#include <linux/mempolicy.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
static size_t pagesize;
diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c
index b8d7e966f44c..9d349c151360 100644
--- a/tools/testing/selftests/mm/mlock-random-test.c
+++ b/tools/testing/selftests/mm/mlock-random-test.c
@@ -13,7 +13,7 @@
#include <sys/ipc.h>
#include <sys/shm.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "mlock2.h"
#define CHUNK_UNIT (128 * 1024)
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 3e90ff37e336..b474f2b20def 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -7,7 +7,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "mlock2.h"
struct vm_boundaries {
diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c
index 100370a7111d..64e8d00ae944 100644
--- a/tools/testing/selftests/mm/mrelease_test.c
+++ b/tools/testing/selftests/mm/mrelease_test.c
@@ -12,7 +12,7 @@
#include <unistd.h>
#include <asm-generic/unistd.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define MB(x) (x << 20)
#define MAX_SIZE_MB 1024
diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index 1d75084b9ca5..a4f75d836733 100644
--- a/tools/testing/selftests/mm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -14,7 +14,7 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
unsigned long page_size;
char *page_buffer;
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5f073504e0b1..308576437228 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -16,7 +16,7 @@
#include <time.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define EXPECT_SUCCESS 0
#define EXPECT_FAILURE 1
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index 005f29c86484..faad4833366a 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -8,7 +8,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include <syscall.h>
#include <errno.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/mm/on-fault-limit.c b/tools/testing/selftests/mm/on-fault-limit.c
index 431c1277d83a..fc4117453c84 100644
--- a/tools/testing/selftests/mm/on-fault-limit.c
+++ b/tools/testing/selftests/mm/on-fault-limit.c
@@ -5,7 +5,7 @@
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static void test_limit(void)
{
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 4fc8e578ec7c..2cb5441f29c7 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -8,7 +8,7 @@
#include <errno.h>
#include <malloc.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#include <linux/types.h>
#include <linux/memfd.h>
#include <linux/userfaultfd.h>
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
index 88659f0a90ea..f546dfb10cae 100644
--- a/tools/testing/selftests/mm/pfnmap.c
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -22,7 +22,7 @@
#include <sys/mman.h>
#include <sys/wait.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "vm_util.h"
static sigjmp_buf sigjmp_buf_env;
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index fa15f006fa68..7c29f075e40b 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -16,7 +16,7 @@
#include <linux/mman.h>
#include <linux/types.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* Define some kernel-like types */
typedef __u8 u8;
diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c
index 84b4a4b345af..ca27200596a4 100644
--- a/tools/testing/selftests/mm/prctl_thp_disable.c
+++ b/tools/testing/selftests/mm/prctl_thp_disable.c
@@ -13,7 +13,7 @@
#include <sys/prctl.h>
#include <sys/wait.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "thp_settings.h"
#include "vm_util.h"
diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c
index 471cae8427f1..cd4610baf5d7 100644
--- a/tools/testing/selftests/mm/process_madv.c
+++ b/tools/testing/selftests/mm/process_madv.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <errno.h>
#include <setjmp.h>
#include <signal.h>
diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c
index 13f7bccfd0a9..53f2058b0ef2 100644
--- a/tools/testing/selftests/mm/rmap.c
+++ b/tools/testing/selftests/mm/rmap.c
@@ -5,7 +5,7 @@
* Author(s): Wei Yang <richard.weiyang@gmail.com>
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <strings.h>
#include <pthread.h>
#include <numa.h>
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index c3a9585de98c..59c0dbe99a9b 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -7,7 +7,7 @@
#include <malloc.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#include "thp_settings.h"
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 743af3c05190..40799f3f0213 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -20,7 +20,7 @@
#include <stdbool.h>
#include <time.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
uint64_t pagesize;
unsigned int pageshift;
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 4f5e290ff1a6..77813d34dcc2 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -27,7 +27,7 @@
#include <stdarg.h>
#include <string.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#if !defined(MAP_HUGETLB)
#define MAP_HUGETLB 0x40000
diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c
index 68201192e37c..bcad47c09518 100644
--- a/tools/testing/selftests/mm/transhuge-stress.c
+++ b/tools/testing/selftests/mm/transhuge-stress.c
@@ -16,7 +16,7 @@
#include <string.h>
#include <sys/mman.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
int backing_fd = -1;
int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index 37d3ca55905f..844a85ab31eb 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -35,7 +35,7 @@
#include <sys/random.h>
#include <stdatomic.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
index 4e4a591cf527..17186d4a4147 100644
--- a/tools/testing/selftests/mm/uffd-wp-mremap.c
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -7,7 +7,7 @@
#include <assert.h>
#include <linux/mman.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "thp_settings.h"
#include "uffd-common.h"
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index 306eba825107..02f290a69132 100644
--- a/tools/testing/selftests/mm/va_high_addr_switch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -10,7 +10,7 @@
#include <string.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
/*
* The hint addr value is used to allocate addresses
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 81b33d8f78f4..4f0923825ed7 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -16,7 +16,7 @@
#include <fcntl.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
/*
* Maximum address range mapped with a single mmap()
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 605cb58ea5c3..d954bf91afd5 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -9,7 +9,7 @@
#include <linux/fs.h>
#include <sys/syscall.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index a8abdf414d46..6ad32b1830f1 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -6,7 +6,7 @@
#include <stdarg.h>
#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
-#include "../kselftest.h"
+#include "kselftest.h"
#include <linux/fs.h>
#define BIT_ULL(nr) (1ULL << (nr))
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index a688871a98eb..7aec3ae82a44 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -21,7 +21,7 @@
#include <linux/mount.h>
#include "../filesystems/wrappers.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef CLONE_NEWNS
#define CLONE_NEWNS 0x00020000
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
index bcf51d785a37..12434415ec36 100644
--- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -15,7 +15,7 @@
#include <stdarg.h>
#include <sys/syscall.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef CLONE_NEWNS
#define CLONE_NEWNS 0x00020000
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index 9403ac01ba11..b16029c40c0f 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,7 +33,7 @@
#include <mqueue.h>
#include <error.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *usage =
"Usage:\n"
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index fb898850867c..303c46eebd94 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -40,7 +40,7 @@
#include <popt.h>
#include <error.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *usage =
"Usage:\n"
diff --git a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
index 0d2af30c3bf5..cb0ca6ed7ebe 100644
--- a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
+++ b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
@@ -11,8 +11,8 @@
#include <string.h>
#include <stdbool.h>
-#include "../kselftest.h"
-#include "../kselftest_harness.h"
+#include "kselftest.h"
+#include "kselftest_harness.h"
#define VMFLAGS "VmFlags:"
#define MSEAL_FLAGS "sl"
diff --git a/tools/testing/selftests/namespaces/file_handle_test.c b/tools/testing/selftests/namespaces/file_handle_test.c
index f1bc5773f552..064b41ad96b2 100644
--- a/tools/testing/selftests/namespaces/file_handle_test.c
+++ b/tools/testing/selftests/namespaces/file_handle_test.c
@@ -14,7 +14,7 @@
#include <sys/wait.h>
#include <unistd.h>
#include <linux/unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef FD_NSFS_ROOT
#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
diff --git a/tools/testing/selftests/namespaces/init_ino_test.c b/tools/testing/selftests/namespaces/init_ino_test.c
index 5b6993c3740b..e4394a2fa0a9 100644
--- a/tools/testing/selftests/namespaces/init_ino_test.c
+++ b/tools/testing/selftests/namespaces/init_ino_test.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <linux/nsfs.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
struct ns_info {
const char *name;
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
index 527ade0a8673..b4a14c6693a5 100644
--- a/tools/testing/selftests/namespaces/nsid_test.c
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -20,7 +20,7 @@
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/nsfs.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* Fixture for tests that create child processes */
FIXTURE(nsid) {
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 6dec59d64083..312f84ee0444 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -16,7 +16,7 @@
#include <sys/socket.h>
#include <linux/nfc.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
index 79a3dd75590e..da7d50cedee6 100644
--- a/tools/testing/selftests/net/af_unix/diag_uid.c
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -14,7 +14,7 @@
#include <sys/types.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(diag_uid)
{
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index b5f474969917..1b499d56656c 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -11,7 +11,7 @@
#include <sys/signalfd.h>
#include <sys/socket.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define BUF_SZ 32
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
index fc467714387e..3a86be9bda17 100644
--- a/tools/testing/selftests/net/af_unix/scm_inq.c
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -6,7 +6,7 @@
#include <sys/socket.h>
#include <sys/types.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define NR_CHUNKS 100
#define MSG_LEN 256
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index ef2921988e5f..2c18b92a2603 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -16,7 +16,7 @@
#include <sys/wait.h>
#include "../../pidfd/pidfd.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(MSG, ...) \
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index 914f99d153ce..d82a79c21c17 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(scm_rights)
{
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
index d799fd8f5c7c..870ca96fa8ea 100644
--- a/tools/testing/selftests/net/af_unix/unix_connect.c
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(unix_connect)
{
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
index cb9fdf51ea59..40126f9b901e 100644
--- a/tools/testing/selftests/net/bind_timewait.c
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(bind_timewait)
{
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index b7b54d646b93..7d11548b2c61 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const __u32 in4addr_any = INADDR_ANY;
static const __u32 in4addr_loopback = INADDR_LOOPBACK;
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
index 4101c36390fd..bb8ae8854273 100644
--- a/tools/testing/selftests/net/can/test_raw_filter.c
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -19,7 +19,7 @@
#include <linux/can.h>
#include <linux/can/raw.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define ID 0x123
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index ded9b925865e..67a72b1a2f3d 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -16,7 +16,7 @@
#include <linux/udp.h>
#include <sys/socket.h>
-#include "../kselftest.h"
+#include "kselftest.h"
enum {
ERN_SUCCESS = 0,
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
index 16e457c2f877..adf8dd0b5e0b 100644
--- a/tools/testing/selftests/net/epoll_busy_poll.c
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -23,7 +23,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* if the headers haven't been updated, we need to define some things */
#if !defined(EPOLL_IOC_TYPE)
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 29451d2244b7..e6834a6cfc8f 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -10,7 +10,7 @@
#include <fcntl.h>
#include <netinet/ip.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef IP_LOCAL_PORT_RANGE
#define IP_LOCAL_PORT_RANGE 51
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 9b44a091802c..0ccf484b1d9d 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -34,7 +34,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define printk(fmt, ...) \
ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c
index 267ef62b5c72..672c9fe086a7 100644
--- a/tools/testing/selftests/net/ipv6_fragmentation.c
+++ b/tools/testing/selftests/net/ipv6_fragmentation.c
@@ -34,7 +34,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MTU 1500
#define LARGER_THAN_MTU 8192
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index 5f827e10717d..5cecb8a1bc94 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TEST_ZONE_ID 123
#define NF_CT_DEFAULT_ZONE_ID 0
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
index 679b6c77ace7..51129c564d0a 100644
--- a/tools/testing/selftests/net/netlink-dumps.c
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -18,7 +18,7 @@
#include <linux/mqueue.h>
#include <linux/rtnetlink.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <ynl.h>
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
index 0a5226196a2e..0f3babf19fd0 100644
--- a/tools/testing/selftests/net/ovpn/ovpn-cli.c
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -32,7 +32,7 @@
#include <sys/socket.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* defines to make checkpatch happy */
#define strscpy strncpy
diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c
index 69444fb29577..fab3b5c2e25d 100644
--- a/tools/testing/selftests/net/proc_net_pktgen.c
+++ b/tools/testing/selftests/net/proc_net_pktgen.c
@@ -10,7 +10,7 @@
#include <stdlib.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char ctrl_cmd_stop[] = "stop";
static const char ctrl_cmd_start[] = "start";
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 84c524357075..ab8d8b7e6cb0 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -54,7 +54,7 @@
#include <unistd.h>
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define RING_NUM_FRAMES 20
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 2938045c5cf9..7caf3135448d 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -46,7 +46,7 @@
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef bug_on
# define bug_on(cond) assert(!(cond))
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 7b9bf8a7bbe1..5aad27a0d13a 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -22,7 +22,7 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
struct reuse_opts {
int reuseaddr[2];
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 65aea27d761c..b6634d6da3d6 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,7 +24,7 @@
#include <sys/resource.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct test_params {
int recv_family;
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9ba36aa688e..2ffd957ffb15 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,7 +23,7 @@
#include <unistd.h>
#include <numa.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static const int PORT = 8888;
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 16ac4df55fdb..b81ed0352d6c 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct options {
int so_timestamp;
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
index d87dd8d8d491..2a3f5c604f52 100644
--- a/tools/testing/selftests/net/sk_so_peek_off.c
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -8,7 +8,7 @@
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *afstr(int af, int proto)
{
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index e9fa14e10732..4740701f1a9a 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -9,7 +9,7 @@
#include <sys/socket.h>
#include <sys/sysinfo.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(so_incoming_cpu)
{
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index be1080003c61..9e270548dad8 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,7 +7,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct socket_testcase {
int domain;
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
index 247c3b3ac1c9..9ec1c9b50e77 100644
--- a/tools/testing/selftests/net/tap.c
+++ b/tools/testing/selftests/net/tap.c
@@ -17,7 +17,7 @@
#include <linux/virtio_net.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char param_dev_tap_name[] = "xmacvtap0";
static const char param_dev_dummy_name[] = "xdummy0";
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index a27cc03c9fbd..49aec2922a31 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -9,7 +9,7 @@
* Can't be included in the header: it defines static variables which
* will be unique to every object. Let's include it only once here.
*/
-#include "../../../kselftest.h"
+#include "kselftest.h"
/* Prevent overriding of one thread's output by another */
static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index c1cb0c75156a..4b3f9b5e50fe 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,7 +26,7 @@
#include <fcntl.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c
index 4c39d599dfce..6146b62610df 100644
--- a/tools/testing/selftests/net/tcp_port_share.c
+++ b/tools/testing/selftests/net/tcp_port_share.c
@@ -10,7 +10,7 @@
#include <sched.h>
#include <stdlib.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define DST_PORT 30000
#define SRC_PORT 40000
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index da1b50b30719..a3ef4b57eb5f 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -21,7 +21,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index fa83918b62d1..0efc67b0357a 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int tun_attach(int fd, char *dev)
{
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 477392715a9a..86d80cce55b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -25,7 +25,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef ETH_MAX_MTU
#define ETH_MAX_MTU 0xFFFFU
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
index 7056340b9339..510e60602511 100644
--- a/tools/testing/selftests/openat2/helpers.h
+++ b/tools/testing/selftests/openat2/helpers.h
@@ -12,7 +12,7 @@
#include <stdbool.h>
#include <errno.h>
#include <linux/types.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define ARRAY_LEN(X) (sizeof (X) / sizeof (*(X)))
#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); })))
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 5790ab446527..0e161ef9e9e4 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -15,7 +15,7 @@
#include <stdbool.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "helpers.h"
/*
diff --git a/tools/testing/selftests/openat2/rename_attack_test.c b/tools/testing/selftests/openat2/rename_attack_test.c
index 0a770728b436..aa5699e45729 100644
--- a/tools/testing/selftests/openat2/rename_attack_test.c
+++ b/tools/testing/selftests/openat2/rename_attack_test.c
@@ -22,7 +22,7 @@
#include <limits.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "helpers.h"
/* Construct a test directory with the following structure:
diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/openat2/resolve_test.c
index bbafad440893..a76ef15ceb90 100644
--- a/tools/testing/selftests/openat2/resolve_test.c
+++ b/tools/testing/selftests/openat2/resolve_test.c
@@ -14,7 +14,7 @@
#include <stdbool.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "helpers.h"
/*
diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
index cd9075444c32..23aac6f97061 100644
--- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
+++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
@@ -20,7 +20,7 @@
#include "../../../../include/uapi/linux/pcitest.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define pci_ep_ioctl(cmd, arg) \
({ \
diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c
index ea0427aac1f9..d1fa8ec58987 100644
--- a/tools/testing/selftests/perf_events/mmap.c
+++ b/tools/testing/selftests/perf_events/mmap.c
@@ -14,7 +14,7 @@
#include <linux/perf_event.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define RB_SIZE 0x3000
#define AUX_SIZE 0x10000
diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c
index 5814611a1dc7..89e7b06835df 100644
--- a/tools/testing/selftests/perf_events/remove_on_exec.c
+++ b/tools/testing/selftests/perf_events/remove_on_exec.c
@@ -30,7 +30,7 @@
#include <sys/syscall.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static volatile int signal_count;
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index d1d8483ac628..b5cf8355345d 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -31,7 +31,7 @@
#include <sys/syscall.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define NUM_THREADS 5
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
index b3a72f0ac522..0f64b9b17081 100644
--- a/tools/testing/selftests/perf_events/watermark_signal.c
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -15,7 +15,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int sigio_count;
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index 96f274f0582b..c9519e7385b6 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -13,7 +13,7 @@
#include <sys/mount.h>
#include <sys/wait.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../pidfd/pidfd.h"
#define __STACK_SIZE (8 * 1024 * 1024)
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
index 7d84097ad45c..059e7ec5b4fd 100644
--- a/tools/testing/selftests/pid_namespace/regression_enomem.c
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -11,7 +11,7 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../pidfd/pidfd.h"
/*
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index d60f10a873bb..9085c1a3c005 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -25,7 +25,7 @@
#undef SCHED_FLAG_KEEP_ALL
#undef SCHED_FLAG_UTIL_CLAMP
-#include "../kselftest.h"
+#include "kselftest.h"
#include "../clone3/clone3_selftests.h"
#ifndef FD_PIDFS_ROOT
diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
index c094aeb1c620..1fdf49939524 100644
--- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c
+++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
@@ -14,7 +14,7 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../filesystems/wrappers.h"
FIXTURE(pidfd_bind_mount) {
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index f718aac75068..9935e9471c77 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -16,7 +16,7 @@
#include <sys/mount.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "kselftest.h"
struct error {
int code;
diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
index 6bd2e9c9565b..68918734dcf3 100644
--- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
@@ -20,7 +20,7 @@
#include <sys/stat.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(file_handle)
{
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index cd51d547b751..ea45b37001b0 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -19,7 +19,7 @@
#include <linux/kcmp.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/*
* UNKNOWN_FD is an fd number that should never exist in the child, as it is
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index cb5430a2fd75..6571e04acd88 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -21,7 +21,7 @@
#include <sys/stat.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(pidfd_info)
{
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index cd3de40e4977..318e6f09c8e0 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -20,7 +20,7 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "kselftest.h"
static int safe_int(const char *numstr, int *converted)
{
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 55d74a50358f..232304f818c7 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -14,7 +14,7 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "kselftest.h"
static bool timeout;
diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
index d7de05edc4b3..e8562a2992f3 100644
--- a/tools/testing/selftests/pidfd/pidfd_setattr_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
@@ -22,7 +22,7 @@
#include <sys/xattr.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(pidfs_setattr)
{
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index e6a079b3d5e2..107edecff224 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -18,7 +18,7 @@
#include <sys/stat.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
enum {
PIDFD_NS_USER,
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index fcd85cad9f18..932cbd8caa77 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -20,7 +20,7 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define str(s) _str(s)
#define _str(s) #s
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 1e2d49751cde..4bf702d62c1c 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,7 +17,7 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
index 5cf7bb0e4bf2..fd57511af7e4 100644
--- a/tools/testing/selftests/pidfd/pidfd_xattr_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
@@ -22,7 +22,7 @@
#include <sys/xattr.h>
#include "pidfd.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(pidfs_xattr)
{
diff --git a/tools/testing/selftests/prctl/set-anon-vma-name-test.c b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
index 4275cb256dce..ac6721b184a6 100644
--- a/tools/testing/selftests/prctl/set-anon-vma-name-test.c
+++ b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
@@ -10,7 +10,7 @@
#include <sys/mman.h>
#include <string.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define AREA_SIZE 1024
diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c
index 562f707ba771..3f7b146d36df 100644
--- a/tools/testing/selftests/prctl/set-process-name.c
+++ b/tools/testing/selftests/prctl/set-process-name.c
@@ -7,7 +7,7 @@
#include <sys/prctl.h>
#include <string.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define CHANGE_NAME "changename"
#define EMPTY_NAME ""
diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c
index a546475db550..a734553718da 100644
--- a/tools/testing/selftests/proc/proc-maps-race.c
+++ b/tools/testing/selftests/proc/proc-maps-race.c
@@ -23,7 +23,7 @@
*
*/
#define _GNU_SOURCE
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 978cbcb3eb11..4e6a3e53f975 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -51,7 +51,7 @@
#define __maybe_unused __attribute__((__unused__))
#endif
-#include "../kselftest.h"
+#include "kselftest.h"
static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
{
diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/selftests/proc/proc-pidns.c
index 52500597f951..25b9a2933c45 100644
--- a/tools/testing/selftests/proc/proc-pidns.c
+++ b/tools/testing/selftests/proc/proc-pidns.c
@@ -16,7 +16,7 @@
#include <sys/stat.h>
#include <sys/prctl.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define ASSERT_ERRNO(expected, _t, seen) \
__EXPECT(expected, #expected, \
diff --git a/tools/testing/selftests/ptrace/get_set_sud.c b/tools/testing/selftests/ptrace/get_set_sud.c
index 5297b10d25c3..2e619c7599bb 100644
--- a/tools/testing/selftests/ptrace/get_set_sud.c
+++ b/tools/testing/selftests/ptrace/get_set_sud.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
diff --git a/tools/testing/selftests/ptrace/get_syscall_info.c b/tools/testing/selftests/ptrace/get_syscall_info.c
index 5bcd1c7b5be6..3f5c3a9fdaba 100644
--- a/tools/testing/selftests/ptrace/get_syscall_info.c
+++ b/tools/testing/selftests/ptrace/get_syscall_info.c
@@ -7,7 +7,7 @@
* matches userspace expectations.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <err.h>
#include <signal.h>
#include <asm/unistd.h>
diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c
index 4198248ef874..1cc411a41cd6 100644
--- a/tools/testing/selftests/ptrace/set_syscall_info.c
+++ b/tools/testing/selftests/ptrace/set_syscall_info.c
@@ -7,7 +7,7 @@
* matches userspace expectations.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <err.h>
#include <fcntl.h>
#include <signal.h>
diff --git a/tools/testing/selftests/ptrace/vmaccess.c b/tools/testing/selftests/ptrace/vmaccess.c
index 4db327b44586..3801b5831527 100644
--- a/tools/testing/selftests/ptrace/vmaccess.c
+++ b/tools/testing/selftests/ptrace/vmaccess.c
@@ -7,7 +7,7 @@
* when de_thread is blocked with ->cred_guard_mutex held.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <stdio.h>
#include <fcntl.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index cd3adfc14969..3c51bdac2dfa 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -23,7 +23,7 @@
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <linux/compiler.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MB (1024 * 1024)
#define RESCTRL_PATH "/sys/fs/resctrl"
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index a58f520f2f41..f24677737066 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -17,7 +17,7 @@
#include <sys/ioctl.h>
#include "../user_events/user_events_selftests.h" /* share tracefs setup */
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TRACEFS_ROOT "/sys/kernel/tracing"
diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c
index 059d2e87eb1f..2d540af7b558 100644
--- a/tools/testing/selftests/riscv/abi/pointer_masking.c
+++ b/tools/testing/selftests/riscv/abi/pointer_masking.c
@@ -9,7 +9,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#ifndef PR_PMLEN_SHIFT
#define PR_PMLEN_SHIFT 24
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index 6d99726aceac..f254b2edd6ce 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -18,7 +18,7 @@
#include <getopt.h>
#include "hwprobe.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
#define MK_PREFETCH(fn) \
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index fd73c87804f3..54c435af9923 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "hwprobe.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/riscv/hwprobe/which-cpus.c b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
index 82c121412dfc..3ab53067e8dd 100644
--- a/tools/testing/selftests/riscv/hwprobe/which-cpus.c
+++ b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
@@ -14,7 +14,7 @@
#include <assert.h>
#include "hwprobe.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
static void help(void)
{
diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index f9ccae50349b..461a65c9be00 100644
--- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -2,7 +2,7 @@
#include <sys/mman.h>
#include <mmap_test.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
TEST(infinite_rlimit)
{
diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index 3f53b6ecc326..58db7d172af2 100644
--- a/tools/testing/selftests/riscv/mm/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -2,7 +2,7 @@
#include <sys/mman.h>
#include <mmap_test.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
TEST(default_rlimit)
{
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 75918d15919f..266a6becdeba 100644
--- a/tools/testing/selftests/riscv/mm/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -5,7 +5,7 @@
#include <sys/resource.h>
#include <stddef.h>
#include <strings.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TOP_DOWN 0
#define BOTTOM_UP 1
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
index ed351a1cb917..e10873d95fed 100644
--- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -4,7 +4,7 @@
#include <stdlib.h>
#include <ucontext.h>
#include <linux/ptrace.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define RISCV_V_MAGIC 0x53465457
#define DEFAULT_VALUE 2
diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c
index be9e1d18ad29..5fd2382e15a2 100644
--- a/tools/testing/selftests/riscv/vector/v_initval.c
+++ b/tools/testing/selftests/riscv/vector/v_initval.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "v_helpers.h"
#define NEXT_PROGRAM "./v_exec_initval_nolibc"
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 62fbb17a0556..d607af3900c1 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -6,7 +6,7 @@
#include <sys/types.h>
#include <stdlib.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "v_helpers.h"
#define NEXT_PROGRAM "./vstate_exec_nolibc"
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index 2348d2c20d0a..1193612bf327 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -9,7 +9,7 @@
#include <string.h>
#include <stddef.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "rseq.h"
#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index dcac5cbe7933..a736727b83c1 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -33,7 +33,7 @@
#include <linux/compiler.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "rseq.h"
/*
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
index be175c0e6ae3..8047d9879039 100644
--- a/tools/testing/selftests/rtc/rtctest.c
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -16,7 +16,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define NUM_UIE 3
#define ALARM_DELTA 3
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 5822e25e0217..ea4068cdefd6 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -20,7 +20,7 @@
#include <sys/syscall.h>
#include <sys/types.h>
-#include "../kselftest.h"
+#include "kselftest.h"
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 874f17763536..32e2d4df397b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -54,7 +54,7 @@
#include <sys/syscall.h>
#include <poll.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
/* Attempt to de-conflict with the selftests tree. */
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 9820b3809c69..13b84e54ce38 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -18,7 +18,7 @@
#include <sys/types.h>
#include <sys/auxv.h>
#include "defines.h"
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "main.h"
static const uint64_t MAGIC = 0x1122334455667788ULL;
diff --git a/tools/testing/selftests/signal/mangle_uc_sigmask.c b/tools/testing/selftests/signal/mangle_uc_sigmask.c
index b79ab92178a8..11dbc14bbc8e 100644
--- a/tools/testing/selftests/signal/mangle_uc_sigmask.c
+++ b/tools/testing/selftests/signal/mangle_uc_sigmask.c
@@ -39,7 +39,7 @@
#include <signal.h>
#include <ucontext.h>
-#include "../kselftest.h"
+#include "kselftest.h"
void handler_verify_ucontext(int signo, siginfo_t *info, void *uc)
{
diff --git a/tools/testing/selftests/signal/sas.c b/tools/testing/selftests/signal/sas.c
index 07227fab1cc9..306b996ab365 100644
--- a/tools/testing/selftests/signal/sas.c
+++ b/tools/testing/selftests/signal/sas.c
@@ -19,7 +19,7 @@
#include <errno.h>
#include <sys/auxv.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "current_stack_pointer.h"
#ifndef SS_AUTODISARM
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
index 84e5d9fd20b0..b986714e7a52 100644
--- a/tools/testing/selftests/sparc64/drivers/adi-test.c
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -16,7 +16,7 @@
#include <sys/stat.h>
#include <unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define DEBUG_LEVEL_1_BIT (0x0001)
#define DEBUG_LEVEL_2_BIT (0x0002)
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 93db5aa246a3..2b44e5d88b63 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -34,7 +34,7 @@
#include <errno.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "synctest.h"
static int run_test(int (*test)(void), char *name)
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index 2eb2c06303f2..b855c6000287 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -14,7 +14,7 @@
#include <stdlib.h>
#include <asm/unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef PR_SET_SYSCALL_USER_DISPATCH
# define PR_SET_SYSCALL_USER_DISPATCH 59
diff --git a/tools/testing/selftests/tdx/tdx_guest_test.c b/tools/testing/selftests/tdx/tdx_guest_test.c
index 81d8cb88ea1a..dfaefa685519 100644
--- a/tools/testing/selftests/tdx/tdx_guest_test.c
+++ b/tools/testing/selftests/tdx/tdx_guest_test.c
@@ -13,7 +13,7 @@
#include <fcntl.h>
#include <linux/tdx-guest.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TDX_GUEST_DEVNAME "/dev/tdx_guest"
#define HEX_DUMP_SIZE 8
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
index d4fc52d47146..7ca4b46ca61d 100644
--- a/tools/testing/selftests/timens/timens.h
+++ b/tools/testing/selftests/timens/timens.h
@@ -7,7 +7,7 @@
#include <stdlib.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef CLONE_NEWTIME
# define CLONE_NEWTIME 0x00000080
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 777d9494b683..5b3ef708d6e9 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -24,7 +24,7 @@
#include <time.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MILLION 1000000
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index a9ef76ea6051..aa66c805f6a4 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -30,7 +30,7 @@
#include <pthread.h>
#include <include/vdso/time64.h>
#include <errno.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index 18e794a46c23..387fda10fcd8 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -28,7 +28,7 @@
#include <sys/time.h>
#include <sys/timex.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int change_skew_test(int ppm)
{
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index 83faa4e354e3..db62a764c29e 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -34,7 +34,7 @@
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int get_clocksources(char list[][30])
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
index 73b636f89fdc..cfa46dafe3e8 100644
--- a/tools/testing/selftests/timers/freq-step.c
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -15,7 +15,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define SAMPLES 100
#define SAMPLE_READINGS 10
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index 9d1573769d55..e53e63e18683 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -29,7 +29,7 @@
#include <string.h>
#include <signal.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */
#define CLOCK_HWSPECIFIC 10
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 04004a7c0934..3568cfb3e815 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -49,7 +49,7 @@
#include <signal.h>
#include <unistd.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define CLOCK_TAI 11
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index 8fd065eec904..c2d3bccb52f2 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -22,7 +22,7 @@
#include <sys/timex.h>
#include <string.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* clear NTP time_status & time_state */
int clear_time_state(void)
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
index 63de2334a291..c0d9368e4fca 100644
--- a/tools/testing/selftests/timers/mqueue-lat.c
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -30,7 +30,7 @@
#include <errno.h>
#include <mqueue.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index 10badae13ebe..a054680b3372 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -28,7 +28,7 @@
#include <string.h>
#include <signal.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */
#define CLOCK_HWSPECIFIC 10
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index de23dc0c9f97..a7ba1eb1e21b 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -25,7 +25,7 @@
#include <string.h>
#include <signal.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index a563c438ac79..38512623622a 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -20,7 +20,7 @@
#include <pthread.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define DELAY 2
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 957f7cd29cb1..a7bae7d80916 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -26,7 +26,7 @@
#include <sys/timex.h>
#include <time.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define shift_right(x, s) ({ \
__typeof__(x) __x = (x); \
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
index 7c07edd0d450..4ba42d198b7e 100644
--- a/tools/testing/selftests/timers/rtcpie.c
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -18,7 +18,7 @@
#include <stdlib.h>
#include <errno.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/*
* This expects the new RTC class driver framework, working with
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
index ed244315e11c..ecc171de4728 100644
--- a/tools/testing/selftests/timers/set-2038.c
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -28,7 +28,7 @@
#include <time.h>
#include <sys/time.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define KTIME_MAX ((long long)~((unsigned long long)1 << 63))
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
index 5b67462efcd6..e283c04284af 100644
--- a/tools/testing/selftests/timers/set-tai.c
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -23,7 +23,7 @@
#include <string.h>
#include <signal.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int set_tai(int offset)
{
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 9d8437c13929..44d2e3614fa5 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -29,7 +29,7 @@
#include <stdlib.h>
#include <pthread.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */
#define CLOCK_HWSPECIFIC 10
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
index 20daaf1782b7..334f36c0eda6 100644
--- a/tools/testing/selftests/timers/set-tz.c
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -23,7 +23,7 @@
#include <string.h>
#include <signal.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int set_tz(int min, int dst)
{
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 46c391d7f45d..53ee5d710ff4 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -34,7 +34,7 @@
#include <fcntl.h>
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index d5564bbf0e50..60b8b21bf782 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -21,7 +21,7 @@
#include <stdlib.h>
#include <sys/time.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* serializes shared list access */
pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 6b7801055ad1..e1e56d3097d6 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -30,7 +30,7 @@
#include <signal.h>
#include <unistd.h>
#include <include/vdso/time64.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define ADJ_SETOFFSET 0x0100
diff --git a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
index 02ecfe687dc2..5cb4e404a2bd 100644
--- a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
+++ b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
@@ -23,7 +23,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c
index 9e1a40f5db17..bc3291dcd18b 100644
--- a/tools/testing/selftests/tty/tty_tstamp_update.c
+++ b/tools/testing/selftests/tty/tty_tstamp_update.c
@@ -9,7 +9,7 @@
#include <unistd.h>
#include <linux/limits.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MIN_TTY_PATH_LEN 8
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index dbe55f3a66f4..974b076f9235 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -19,7 +19,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
#define __UEVENT_BUFFER_SIZE (2048 * 2)
diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c
index 7288a05136ba..85892b3b719c 100644
--- a/tools/testing/selftests/user_events/abi_test.c
+++ b/tools/testing/selftests/user_events/abi_test.c
@@ -20,7 +20,7 @@
#include <string.h>
#include <asm/unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "user_events_selftests.h"
const char *data_file = "/sys/kernel/tracing/user_events_data";
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index 54c9412f8dee..78e3c33f4015 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -14,7 +14,7 @@
#include <sys/stat.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "user_events_selftests.h"
const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index 0bb46793dcd4..decce06b9ba8 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -15,7 +15,7 @@
#include <sys/uio.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "user_events_selftests.h"
const char *data_file = "/sys/kernel/tracing/user_events_data";
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index 68625362add2..cafec0e52eb3 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -16,7 +16,7 @@
#include <unistd.h>
#include <asm/unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "user_events_selftests.h"
const char *data_file = "/sys/kernel/tracing/user_events_data";
diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h
index e1c3c063c031..3b5d37e46f8a 100644
--- a/tools/testing/selftests/user_events/user_events_selftests.h
+++ b/tools/testing/selftests/user_events/user_events_selftests.h
@@ -9,7 +9,7 @@
#include <unistd.h>
#include <errno.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static inline void tracefs_unmount(void)
{
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index 238d609a457a..c620317eaeea 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -18,7 +18,7 @@
#include <unistd.h>
#include <sys/syscall.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vdso_config.h"
#include "vdso_call.h"
#include "parse_vdso.h"
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index 0aad682b12c8..9a5c9c05e09c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -10,7 +10,7 @@
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#if defined(__aarch64__)
static bool cpu_has_capabilities(void)
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index da651cf53c6c..055af95aa552 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -21,7 +21,7 @@
#include "vdso_config.h"
#include "vdso_call.h"
-#include "../kselftest.h"
+#include "kselftest.h"
static const char **name;
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index cdeaed45fb26..bea8ad54da11 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -11,7 +11,7 @@
#include <sys/auxv.h>
#include <sys/time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "parse_vdso.h"
#include "vdso_config.h"
#include "vdso_call.h"
diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
index dd1132508a0d..ef402001e898 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -23,7 +23,7 @@
#include <linux/random.h>
#include <linux/ptrace.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "parse_vdso.h"
#include "vdso_config.h"
#include "vdso_call.h"
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index 9ce795b806f0..912edadad92c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -16,7 +16,7 @@
#include <sys/time.h>
#endif
-#include "../kselftest.h"
+#include "kselftest.h"
#include "parse_vdso.h"
#include "vdso_config.h"
#include "vdso_call.h"
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index 13fdb4b0b10f..8e34b9bfc96b 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -19,7 +19,7 @@
#include <linux/types.h>
#include <linux/vfio.h>
-#include "../../../kselftest.h"
+#include "kselftest.h"
#include <libvfio.h>
#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
index ca0e25efbfa1..6827f4a6febe 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include "../../../kselftest.h"
+#include "kselftest.h"
#include <libvfio.h>
#ifdef __x86_64__
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 5397822c3dd4..16eba2ecca47 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -12,7 +12,7 @@
#include <libvfio.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char *device_bdf;
diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
index caf1c6291f3d..17017ed3beac 100644
--- a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
+++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
@@ -11,7 +11,7 @@
#include <unistd.h>
#include <libvfio.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char iommu_dev_path[] = "/dev/iommu";
static const char *cdev_path;
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c
index ecbb669b3765..7c0fe8ce3a61 100644
--- a/tools/testing/selftests/vfio/vfio_pci_device_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c
@@ -12,7 +12,7 @@
#include <libvfio.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char *device_bdf;
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
index f0ca8310d6a8..afa0480ddd9b 100644
--- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -7,7 +7,7 @@
#include <libvfio.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char *device_bdf;
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 936b18be07cf..0504c11c2de6 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -81,7 +81,7 @@ CONFIG_WQ_WATCHDOG=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
CONFIG_PANIC_TIMEOUT=-1
CONFIG_STACKTRACE=y
CONFIG_EARLY_PRINTK=y
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index 93a89a5997ca..f4d67b050275 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -17,7 +17,7 @@
#include <stdint.h>
#include <sys/wait.h>
-#include "../kselftest.h" /* For __cpuid_count() */
+#include "kselftest.h" /* For __cpuid_count() */
#include "helpers.h"
static inline int xsave_enabled(void)
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
index 6deaad035161..4c747a1278d9 100644
--- a/tools/testing/selftests/x86/helpers.h
+++ b/tools/testing/selftests/x86/helpers.h
@@ -7,7 +7,7 @@
#include <asm/processor-flags.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static inline unsigned long get_eflags(void)
{
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 0873b0e5f48b..1919fa6daec0 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -18,7 +18,7 @@
#include <sys/uio.h>
#include <linux/io_uring.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef __x86_64__
# error This test is 64-bit only
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 41c42b7b54a6..ca0eca7b9dce 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -25,7 +25,7 @@
#include <sys/mman.h>
#include <linux/ptrace.h>
-#include "../kselftest.h"
+#include "kselftest.h"
/* Common system call numbers */
#define SYS_READ 0
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index 94bee6e0c813..a5edf6c5f17e 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -20,7 +20,7 @@
#include <sys/auxv.h>
#include <sys/syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define PAGE_SIZE 4096
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 918eaec8bfbe..f1c3df642352 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -21,7 +21,7 @@
#include <sys/uio.h>
#include "helpers.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#ifdef __x86_64__
#define TOTAL_TESTS 13
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
index e91e3092b5d2..6ee816e7625a 100644
--- a/tools/testing/selftests/x86/xstate.h
+++ b/tools/testing/selftests/x86/xstate.h
@@ -4,7 +4,7 @@
#include <stdint.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define XSAVE_HDR_OFFSET 512
#define XSAVE_HDR_SIZE 64