summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/device-mapper/dm-raid.rst80
-rw-r--r--Documentation/admin-guide/device-mapper/verity.rst6
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/alpha/include/asm/console.h4
-rw-r--r--arch/alpha/include/asm/page.h4
-rw-r--r--arch/alpha/include/asm/pal.h4
-rw-r--r--arch/alpha/include/asm/thread_info.h8
-rw-r--r--arch/alpha/include/uapi/asm/ioctls.h8
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/include/asm/word-at-a-time.h10
-rw-r--r--arch/arm/mm/alignment.c6
-rw-r--r--arch/arm/mm/fault.c100
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/boot/vmem.c16
-rw-r--r--arch/s390/include/asm/bug.h5
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/mm/gmap_helpers.c9
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/vmem.c14
-rw-r--r--arch/s390/pci/pci.c6
-rw-r--r--arch/s390/pci/pci_bus.c18
-rw-r--r--arch/s390/pci/pci_irq.c332
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/dm-bufio.c10
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c117
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-exception-store.h2
-rw-r--r--drivers/md/dm-log-writes.c1
-rw-r--r--drivers/md/dm-mpath.c63
-rw-r--r--drivers/md/dm-pcache/cache.c13
-rw-r--r--drivers/md/dm-pcache/cache_segment.c13
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-snap.c73
-rw-r--r--drivers/md/dm-sysfs.c8
-rw-r--r--drivers/md/dm-table.c4
-rw-r--r--drivers/md/dm-thin.c19
-rw-r--r--drivers/md/dm-vdo/action-manager.c2
-rw-r--r--drivers/md/dm-vdo/admin-state.c75
-rw-r--r--drivers/md/dm-vdo/block-map.c51
-rw-r--r--drivers/md/dm-vdo/completion.c5
-rw-r--r--drivers/md/dm-vdo/data-vio.c34
-rw-r--r--drivers/md/dm-vdo/dedupe.c42
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c5
-rw-r--r--drivers/md/dm-vdo/encodings.c26
-rw-r--r--drivers/md/dm-vdo/flush.c6
-rw-r--r--drivers/md/dm-vdo/funnel-workqueue.c7
-rw-r--r--drivers/md/dm-vdo/io-submitter.c26
-rw-r--r--drivers/md/dm-vdo/logical-zone.c20
-rw-r--r--drivers/md/dm-vdo/packer.c15
-rw-r--r--drivers/md/dm-vdo/physical-zone.c5
-rw-r--r--drivers/md/dm-vdo/recovery-journal.c30
-rw-r--r--drivers/md/dm-vdo/slab-depot.c96
-rw-r--r--drivers/md/dm-vdo/vdo.c9
-rw-r--r--drivers/md/dm-vdo/vdo.h4
-rw-r--r--drivers/md/dm-vdo/vio.c3
-rw-r--r--drivers/md/dm-vdo/vio.h6
-rw-r--r--drivers/md/dm-verity-fec.c41
-rw-r--r--drivers/md/dm-verity-fec.h10
-rw-r--r--drivers/md/dm-verity-target.c209
-rw-r--r--drivers/md/dm-verity.h52
-rw-r--r--drivers/md/dm-zone.c3
-rw-r--r--drivers/md/dm.c46
-rw-r--r--drivers/regulator/core.c13
-rw-r--r--drivers/regulator/fixed.c11
-rw-r--r--drivers/regulator/spacemit-p1.c4
-rw-r--r--drivers/s390/char/sclp_mem.c16
-rw-r--r--drivers/s390/char/vmur.c8
-rw-r--r--drivers/spi/spi-microchip-core-spi.c1
-rw-r--r--include/linux/dma-mapping.h2
-rw-r--r--include/linux/irqdesc.h6
-rw-r--r--include/linux/restart_block.h2
-rw-r--r--include/linux/slab.h7
-rw-r--r--kernel/dma/pool.c2
-rw-r--r--kernel/futex/waitwake.c9
-rw-r--r--kernel/irq/irqdesc.c6
-rw-r--r--mm/slab.h1
-rw-r--r--mm/slab_common.c52
-rw-r--r--mm/slub.c55
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore1
-rw-r--r--tools/testing/selftests/futex/functional/Makefile3
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c5
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c8
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c2
-rw-r--r--tools/testing/selftests/futex/functional/robust_list.c552
86 files changed, 1826 insertions, 744 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
index bb17e26e3c1b..e11f10764770 100644
--- a/Documentation/admin-guide/device-mapper/dm-raid.rst
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
@@ -20,10 +20,10 @@ The target is named "raid" and it accepts the following parameters::
raid0 RAID0 striping (no resilience)
raid1 RAID1 mirroring
raid4 RAID4 with dedicated last parity disk
- raid5_n RAID5 with dedicated last parity disk supporting takeover
+ raid5_n RAID5 with dedicated last parity disk supporting takeover from/to raid1
Same as raid4
- - Transitory layout
+ - Transitory layout for takeover from/to raid1
raid5_la RAID5 left asymmetric
- rotating parity 0 with data continuation
@@ -48,8 +48,8 @@ The target is named "raid" and it accepts the following parameters::
raid6_n_6 RAID6 with dedicate parity disks
- parity and Q-syndrome on the last 2 disks;
- layout for takeover from/to raid4/raid5_n
- raid6_la_6 Same as "raid_la" plus dedicated last Q-syndrome disk
+ layout for takeover from/to raid0/raid4/raid5_n
+ raid6_la_6 Same as "raid_la" plus dedicated last Q-syndrome disk supporting takeover from/to raid5
- layout for takeover from raid5_la from/to raid6
raid6_ra_6 Same as "raid5_ra" dedicated last Q-syndrome disk
@@ -173,9 +173,9 @@ The target is named "raid" and it accepts the following parameters::
The delta_disks option value (-251 < N < +251) triggers
device removal (negative value) or device addition (positive
value) to any reshape supporting raid levels 4/5/6 and 10.
- RAID levels 4/5/6 allow for addition of devices (metadata
- and data device tuple), raid10_near and raid10_offset only
- allow for device addition. raid10_far does not support any
+ RAID levels 4/5/6 allow for addition and removal of devices
+ (metadata and data device tuple), raid10_near and raid10_offset
+ only allow for device addition. raid10_far does not support any
reshaping at all.
A minimum of devices have to be kept to enforce resilience,
which is 3 devices for raid4/5 and 4 devices for raid6.
@@ -372,6 +372,72 @@ to safely enable discard support for RAID 4/5/6:
'devices_handle_discards_safely'
+Takeover/Reshape Support
+------------------------
+The target natively supports these two types of MDRAID conversions:
+
+o Takeover: Converts an array from one RAID level to another
+
+o Reshape: Changes the internal layout while maintaining the current RAID level
+
+Each operation is only valid under specific constraints imposed by the existing array's layout and configuration.
+
+
+Takeover:
+linear -> raid1 with N >= 2 mirrors
+raid0 -> raid4 (add dedicated parity device)
+raid0 -> raid5 (add dedicated parity device)
+raid0 -> raid10 with near layout and N >= 2 mirror groups (raid0 stripes have to become first member within mirror groups)
+raid1 -> linear
+raid1 -> raid5 with 2 mirrors
+raid4 -> raid5 w/ rotating parity
+raid5 with dedicated parity device -> raid4
+raid5 -> raid6 (with dedicated Q-syndrome)
+raid6 (with dedicated Q-syndrome) -> raid5
+raid10 with near layout and even number of disks -> raid0 (select any in-sync device from each mirror group)
+
+Reshape:
+linear: not possible
+raid0: not possible
+raid1: change number of mirrors
+raid4: add and remove stripes (minimum 3), change stripesize
+raid5: add and remove stripes (minimum 3, special case 2 for raid1 takeover), change rotating parity algorithms, change stripesize
+raid6: add and remove stripes (minimum 4), change rotating syndrome algorithms, change stripesize
+raid10 near: add stripes (minimum 4), change stripesize, no stripe removal possible, change to offset layout
+raid10 offset: add stripes, change stripesize, no stripe removal possible, change to near layout
+raid10 far: not possible
+
+Table line examples:
+
+### raid1 -> raid5
+#
+# 2 devices limitation in raid1.
+# raid5 personality is able to just map 2 like raid1.
+# Reshape after takeover to change to full raid5 layout
+
+ 0 1960886272 raid raid1 3 0 region_size 2048 2 /dev/dm-0 /dev/dm-1 /dev/dm-2 /dev/dm-3
+
+# dm-0 and dm-2 are e.g. 4MiB large metadata devices, dm-1 and dm-3 have to be at least 1960886272 big.
+#
+# Table line to takeover to raid5
+
+ 0 1960886272 raid raid5 3 0 region_size 2048 2 /dev/dm-0 /dev/dm-1 /dev/dm-2 /dev/dm-3
+
+# Add required out-of-place reshape space to the beginniong of the given 2 data devices,
+# allocate another metadata/data device tuple with the same sizes for the parity space
+# and zero the first 4K of the metadata device.
+#
+# Example table of the out-of-place reshape space addition for one data device, e.g. dm-1
+
+ 0 8192 linear 8:0 0 1960903888 # <- must be free space segment
+ 8192 1960886272 linear 8:0 0 2048 # previous data segment
+
+# Mapping table for e.g. raid5_rs reshape causing the size of the raid device to double-fold once the reshape finishes.
+# Check the status output (e.g. "dmsetup status $RaidDev") for progess.
+
+ 0 $((2 * 1960886272)) raid raid5 7 0 region_size 2048 data_offset 8192 delta_disk 1 2 /dev/dm-0 /dev/dm-1 /dev/dm-2 /dev/dm-3
+
+
Version History
---------------
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
index 8c3f1f967a3c..3ecab1cff9c6 100644
--- a/Documentation/admin-guide/device-mapper/verity.rst
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -236,8 +236,10 @@ is available at the cryptsetup project's wiki page
Status
======
-V (for Valid) is returned if every check performed so far was valid.
-If any check failed, C (for Corruption) is returned.
+1. V (for Valid) is returned if every check performed so far was valid.
+ If any check failed, C (for Corruption) is returned.
+2. Number of corrected blocks by Forward Error Correction.
+ '-' if Forward Error Correction is not enabled.
Example
=======
diff --git a/MAINTAINERS b/MAINTAINERS
index aff3e162180d..c9e416ba74c6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7225,6 +7225,7 @@ DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
+M: Benjamin Marzinski <bmarzins@redhat.com>
L: dm-devel@lists.linux.dev
S: Maintained
Q: http://patchwork.kernel.org/project/dm-devel/list/
diff --git a/arch/alpha/include/asm/console.h b/arch/alpha/include/asm/console.h
index 088b7b9eb15a..1cabdb6064bb 100644
--- a/arch/alpha/include/asm/console.h
+++ b/arch/alpha/include/asm/console.h
@@ -4,7 +4,7 @@
#include <uapi/asm/console.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern long callback_puts(long unit, const char *s, long length);
extern long callback_getc(long unit);
extern long callback_open_console(void);
@@ -26,5 +26,5 @@ struct crb_struct;
struct hwrpb_struct;
extern int callback_init_done;
extern void * callback_init(void *);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __AXP_CONSOLE_H */
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 5ec4c77e432e..d2c6667d73e9 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -6,7 +6,7 @@
#include <asm/pal.h>
#include <vdso/page.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define STRICT_MM_TYPECHECKS
@@ -74,7 +74,7 @@ typedef struct page *pgtable_t;
#define PAGE_OFFSET 0xfffffc0000000000
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET))
diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h
index db2b3b18b34c..799a64c05198 100644
--- a/arch/alpha/include/asm/pal.h
+++ b/arch/alpha/include/asm/pal.h
@@ -4,7 +4,7 @@
#include <uapi/asm/pal.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void halt(void) __attribute__((noreturn));
#define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
@@ -183,5 +183,5 @@ qemu_get_vmtime(void)
return v0;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ALPHA_PAL_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 4a4d00b37986..98ccbca64984 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -4,14 +4,14 @@
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
#include <asm/types.h>
#include <asm/hwrpb.h>
#include <asm/sysinfo.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct thread_info {
struct pcb_struct pcb; /* palcode state */
@@ -44,7 +44,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
register unsigned long *current_stack_pointer __asm__ ("$30");
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* Thread information allocation. */
#define THREAD_SIZE_ORDER 1
@@ -110,7 +110,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
put_user(res, (int __user *)(value)); \
})
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void __save_fpu(void);
static inline void save_fpu(void)
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index 971311605288..a09d04b49cc6 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -23,10 +23,10 @@
#define TCSETSW _IOW('t', 21, struct termios)
#define TCSETSF _IOW('t', 22, struct termios)
-#define TCGETA _IOR('t', 23, struct termio)
-#define TCSETA _IOW('t', 24, struct termio)
-#define TCSETAW _IOW('t', 25, struct termio)
-#define TCSETAF _IOW('t', 28, struct termio)
+#define TCGETA 0x40127417
+#define TCSETA 0x80127418
+#define TCSETAW 0x80127419
+#define TCSETAF 0x8012741c
#define TCSBRK _IO('t', 29)
#define TCXONC _IO('t', 30)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ff61891abe53..fa83c040ee2d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -82,7 +82,7 @@ config ARM
select HAS_IOPORT
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && (!PREEMPT_RT || !SMP)
select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
@@ -1213,7 +1213,7 @@ config HIGHMEM
config HIGHPTE
bool "Allocate 2nd-level pagetables from highmem" if EXPERT
- depends on HIGHMEM
+ depends on HIGHMEM && !PREEMPT_RT
default y
help
The VM uses one page of physical memory for each page table.
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index f9a3897b06e7..5023f98d8293 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -67,7 +67,7 @@ static inline unsigned long find_zero(unsigned long mask)
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
- unsigned long ret, offset;
+ unsigned long ret, tmp;
/* Load word from unaligned pointer addr */
asm(
@@ -75,9 +75,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
"2:\n"
" .pushsection .text.fixup,\"ax\"\n"
" .align 2\n"
- "3: and %1, %2, #0x3\n"
- " bic %2, %2, #0x3\n"
- " ldr %0, [%2]\n"
+ "3: bic %1, %2, #0x3\n"
+ " ldr %0, [%1]\n"
+ " and %1, %2, #0x3\n"
" lsl %1, %1, #0x3\n"
#ifndef __ARMEB__
" lsr %0, %0, %1\n"
@@ -90,7 +90,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
" .align 3\n"
" .long 1b, 3b\n"
" .popsection"
- : "=&r" (ret), "=&r" (offset)
+ : "=&r" (ret), "=&r" (tmp)
: "r" (addr), "Qo" (*(unsigned long *)addr));
return ret;
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 3c6ddb1afdc4..812380f30ae3 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -19,10 +19,11 @@
#include <linux/init.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
+#include <linux/unaligned.h>
#include <asm/cp15.h>
#include <asm/system_info.h>
-#include <linux/unaligned.h>
+#include <asm/system_misc.h>
#include <asm/opcodes.h>
#include "fault.h"
@@ -809,6 +810,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
int thumb2_32b = 0;
int fault;
+ if (addr >= TASK_SIZE && user_mode(regs))
+ harden_branch_predictor();
+
if (interrupts_enabled(regs))
local_irq_enable();
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 2bc828a1940c..ed4330cc3f4e 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -128,6 +128,19 @@ static inline bool is_translation_fault(unsigned int fsr)
return false;
}
+static inline bool is_permission_fault(unsigned int fsr)
+{
+ int fs = fsr_fs(fsr);
+#ifdef CONFIG_ARM_LPAE
+ if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
+ return true;
+#else
+ if (fs == FS_L1_PERM || fs == FS_L2_PERM)
+ return true;
+#endif
+ return false;
+}
+
static void die_kernel_fault(const char *msg, struct mm_struct *mm,
unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
@@ -162,6 +175,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
*/
if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
+ } else if (is_permission_fault(fsr) && fsr & FSR_LNX_PF) {
+ msg = "execution of memory";
} else {
if (is_translation_fault(fsr) &&
kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
@@ -183,9 +198,6 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
{
struct task_struct *tsk = current;
- if (addr > TASK_SIZE)
- harden_branch_predictor();
-
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
@@ -225,19 +237,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
}
#ifdef CONFIG_MMU
-static inline bool is_permission_fault(unsigned int fsr)
-{
- int fs = fsr_fs(fsr);
-#ifdef CONFIG_ARM_LPAE
- if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
- return true;
-#else
- if (fs == FS_L1_PERM || fs == FS_L2_PERM)
- return true;
-#endif
- return false;
-}
-
#ifdef CONFIG_CPU_TTBR0_PAN
static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
{
@@ -260,6 +259,37 @@ static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
#endif
static int __kprobes
+do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr,
+ unsigned int fsr, struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ /*
+ * Fault from user mode for a kernel space address. User mode
+ * should not be faulting in kernel space, which includes the
+ * vector/khelper page. Handle the branch predictor hardening
+ * while interrupts are still disabled, then send a SIGSEGV.
+ */
+ harden_branch_predictor();
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ } else {
+ /*
+ * Fault from kernel mode. Enable interrupts if they were
+ * enabled in the parent context. Section (upper page table)
+ * translation faults are handled via do_translation_fault(),
+ * so we will only get here for a non-present kernel space
+ * PTE or PTE permission fault. This may happen in exceptional
+ * circumstances and need the fixup tables to be walked.
+ */
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
+ __do_kernel_fault(mm, addr, fsr, regs);
+ }
+
+ return 0;
+}
+
+static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct mm_struct *mm = current->mm;
@@ -272,6 +302,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (kprobe_page_fault(regs, fsr))
return 0;
+ /*
+ * Handle kernel addresses faults separately, which avoids touching
+ * the mmap lock from contexts that are not able to sleep.
+ */
+ if (addr >= TASK_SIZE)
+ return do_kernel_address_page_fault(mm, addr, fsr, regs);
/* Enable interrupts if they were enabled in the parent context. */
if (interrupts_enabled(regs))
@@ -448,16 +484,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* We enter here because the first level page table doesn't contain
* a valid entry for the address.
*
- * If the address is in kernel space (>= TASK_SIZE), then we are
- * probably faulting in the vmalloc() area.
+ * If this is a user address (addr < TASK_SIZE), we handle this as a
+ * normal page fault. This leaves the remainder of the function to handle
+ * kernel address translation faults.
+ *
+ * Since user mode is not permitted to access kernel addresses, pass these
+ * directly to do_kernel_address_page_fault() to handle.
*
- * If the init_task's first level page tables contains the relevant
- * entry, we copy the it to this task. If not, we send the process
- * a signal, fixup the exception, or oops the kernel.
+ * Otherwise, we're probably faulting in the vmalloc() area, so try to fix
+ * that up. Note that we must not take any locks or enable interrupts in
+ * this case.
*
- * NOTE! We MUST NOT take any locks for this case. We may be in an
- * interrupt or a critical region, and should only copy the information
- * from the master page table, nothing more.
+ * If vmalloc() fixup fails, that means the non-leaf page tables did not
+ * contain an entry for this address, so handle this via
+ * do_kernel_address_page_fault().
*/
#ifdef CONFIG_MMU
static int __kprobes
@@ -523,7 +563,8 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
return 0;
bad_area:
- do_bad_area(addr, fsr, regs);
+ do_kernel_address_page_fault(current->mm, addr, fsr, regs);
+
return 0;
}
#else /* CONFIG_MMU */
@@ -543,7 +584,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ /*
+ * If this is a kernel address, but from user mode, then userspace
+ * is trying bad stuff. Invoke the branch predictor handling.
+ * Interrupts are disabled here.
+ */
+ if (addr >= TASK_SIZE && user_mode(regs))
+ harden_branch_predictor();
+
do_bad_area(addr, fsr, regs);
+
return 0;
}
#endif /* CONFIG_ARM_LPAE */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 938e5df75b2d..0e5fad5f06ca 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -238,6 +238,7 @@ config S390
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
@@ -254,6 +255,7 @@ config S390
select HOTPLUG_SMT
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
+ select IRQ_MSI_LIB if PCI
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_MERGE_VMAS
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index fbe64ffdfb96..7d6cc4c85af0 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -244,22 +244,10 @@ static void *boot_crst_alloc(unsigned long val)
static pte_t *boot_pte_alloc(void)
{
- static void *pte_leftover;
pte_t *pte;
- /*
- * handling pte_leftovers this way helps to avoid memory fragmentation
- * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
- */
- if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
- pte = pte_leftover + _PAGE_TABLE_SIZE;
- __arch_set_page_dat(pte, 1);
- } else {
- pte = pte_leftover;
- pte_leftover = NULL;
- }
-
+ pte = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ __arch_set_page_dat(pte, 1);
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
return pte;
}
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index acb4b13d98c5..ee9221bb5d18 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -4,11 +4,14 @@
#include <linux/stringify.h>
+#ifdef CONFIG_BUG
+
#ifndef CONFIG_DEBUG_BUGVERBOSE
#define _BUGVERBOSE_LOCATION(file, line)
#else
#define __BUGVERBOSE_LOCATION(file, line) \
.pushsection .rodata.str, "aMS", @progbits, 1; \
+ .align 2; \
10002: .ascii file "\0"; \
.popsection; \
\
@@ -52,6 +55,8 @@ do { \
#define HAVE_ARCH_BUG
+#endif /* CONFIG_BUG */
+
#include <asm-generic/bug.h>
#endif /* _ASM_S390_BUG_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 9240a363c893..c1d63b613bf9 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -166,6 +166,8 @@ static inline int page_reset_referenced(unsigned long addr)
return CC_TRANSFORM(cc);
}
+int split_pud_page(pud_t *pudp, unsigned long addr);
+
/* Bits int the storage key */
#define _PAGE_CHANGED 0x02 /* HW changed bit */
#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a32f465ecf73..c0ff19dab580 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/iommu.h>
+#include <linux/irqdomain.h>
#include <linux/pci_hotplug.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -109,6 +110,7 @@ struct zpci_bus {
struct list_head resources;
struct list_head bus_next;
struct resource bus_resource;
+ struct irq_domain *msi_parent_domain;
int topo; /* TID if topo_is_tid, PCHID otherwise */
int domain_nr;
u8 multifunction : 1;
@@ -310,6 +312,9 @@ int zpci_dma_exit_device(struct zpci_dev *zdev);
/* IRQ */
int __init zpci_irq_init(void);
void __init zpci_irq_exit(void);
+int zpci_set_irq(struct zpci_dev *zdev);
+int zpci_create_parent_msi_domain(struct zpci_bus *zbus);
+void zpci_remove_parent_msi_domain(struct zpci_bus *zbus);
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index 549f14ad08af..d41b19925a5a 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -47,6 +47,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
+ unsigned long pgstev;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
@@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
if (pte_swap(*ptep)) {
preempt_disable();
pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
- ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
- pte_clear(mm, vmaddr, ptep);
+ if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO)) {
+ ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
+ pte_clear(mm, vmaddr, ptep);
+ }
pgste_set_unlock(ptep, pgste);
preempt_enable();
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 3042647c9dbf..d3ce04a4b248 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -204,7 +204,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
return rc;
}
-static int split_pud_page(pud_t *pudp, unsigned long addr)
+int split_pud_page(pud_t *pudp, unsigned long addr)
{
unsigned long pmd_addr, prot;
pmd_t *pm_dir, *pmdp;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d96587b84e81..eeadff45e0e1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -330,10 +330,14 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
if (pud_leaf(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE)) {
+ if (!direct)
+ vmem_free_pages(pud_deref(*pud), get_order(PUD_SIZE), altmap);
pud_clear(pud);
pages++;
+ continue;
+ } else {
+ split_pud_page(pud, addr & PUD_MASK);
}
- continue;
}
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
@@ -433,9 +437,15 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
return -EINVAL;
- /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ /* Don't mess with any tables not fully in 1:1 mapping, vmemmap & kasan area */
+#ifdef CONFIG_KASAN
+ if (WARN_ON_ONCE(!(start >= KASAN_SHADOW_START && end <= KASAN_SHADOW_END) &&
+ end > __abs_lowcore))
+ return -EINVAL;
+#else
if (WARN_ON_ONCE(end > __abs_lowcore))
return -EINVAL;
+#endif
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
pgd = pgd_offset_k(addr);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 93d2c9c780fc..5a6ace9d875a 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -708,6 +708,12 @@ int zpci_reenable_device(struct zpci_dev *zdev)
if (rc)
return rc;
+ if (zdev->msi_nr_irqs > 0) {
+ rc = zpci_set_irq(zdev);
+ if (rc)
+ return rc;
+ }
+
rc = zpci_iommu_register_ioat(zdev, &status);
if (rc)
zpci_disable_device(zdev);
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 72adc8f6e94f..66c4bd888b29 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
+#include <linux/irqdomain.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
@@ -198,19 +199,27 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
zbus->multifunction = zpci_bus_is_multifunction_root(fr);
zbus->max_bus_speed = fr->max_bus_speed;
+ if (zpci_create_parent_msi_domain(zbus))
+ goto out_free_domain;
+
/*
* Note that the zbus->resources are taken over and zbus->resources
* is empty after a successful call
*/
bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
- if (!bus) {
- zpci_free_domain(zbus->domain_nr);
- return -EFAULT;
- }
+ if (!bus)
+ goto out_remove_msi_domain;
zbus->bus = bus;
+ dev_set_msi_domain(&zbus->bus->dev, zbus->msi_parent_domain);
return 0;
+
+out_remove_msi_domain:
+ zpci_remove_parent_msi_domain(zbus);
+out_free_domain:
+ zpci_free_domain(zbus->domain_nr);
+ return -ENOMEM;
}
static void zpci_bus_release(struct kref *kref)
@@ -231,6 +240,7 @@ static void zpci_bus_release(struct kref *kref)
mutex_lock(&zbus_list_lock);
list_del(&zbus->bus_next);
mutex_unlock(&zbus_list_lock);
+ zpci_remove_parent_msi_domain(zbus);
kfree(zbus);
}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 2a06df8c2498..e9dd45f3c09d 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -6,6 +6,7 @@
#include <linux/kernel_stat.h>
#include <linux/pci.h>
#include <linux/msi.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/smp.h>
#include <asm/isc.h>
@@ -97,7 +98,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
}
/* Register adapter interruptions */
-static int zpci_set_irq(struct zpci_dev *zdev)
+int zpci_set_irq(struct zpci_dev *zdev)
{
int rc;
@@ -125,27 +126,53 @@ static int zpci_clear_irq(struct zpci_dev *zdev)
static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- struct msi_desc *entry = irq_data_get_msi_desc(data);
- struct msi_msg msg = entry->msg;
- int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
+ irq_data_update_affinity(data, dest);
+ return IRQ_SET_MASK_OK;
+}
- msg.address_lo &= 0xff0000ff;
- msg.address_lo |= (cpu_addr << 8);
- pci_write_msi_msg(data->irq, &msg);
+/*
+ * Encode the hwirq number for the parent domain. The encoding must be unique
+ * for each IRQ of each device in the parent domain, so it uses the devfn to
+ * identify the device and the msi_index to identify the IRQ within that device.
+ */
+static inline u32 zpci_encode_hwirq(u8 devfn, u16 msi_index)
+{
+ return (devfn << 16) | msi_index;
+}
- return IRQ_SET_MASK_OK;
+static inline u16 zpci_decode_hwirq_msi_index(irq_hw_number_t hwirq)
+{
+ return hwirq & 0xffff;
+}
+
+static void zpci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+ struct zpci_dev *zdev = to_zpci_dev(desc->dev);
+
+ if (irq_delivery == DIRECTED) {
+ int cpu = cpumask_first(irq_data_get_affinity_mask(data));
+
+ msg->address_lo = zdev->msi_addr & 0xff0000ff;
+ msg->address_lo |= (smp_cpu_get_cpu_address(cpu) << 8);
+ } else {
+ msg->address_lo = zdev->msi_addr & 0xffffffff;
+ }
+ msg->address_hi = zdev->msi_addr >> 32;
+ msg->data = zpci_decode_hwirq_msi_index(data->hwirq);
}
static struct irq_chip zpci_irq_chip = {
.name = "PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
+ .irq_compose_msi_msg = zpci_compose_msi_msg,
};
static void zpci_handle_cpu_local_irq(bool rescan)
{
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
union zpci_sic_iib iib = {{0}};
+ struct irq_domain *msi_domain;
+ irq_hw_number_t hwirq;
unsigned long bit;
int irqs_on = 0;
@@ -163,7 +190,9 @@ static void zpci_handle_cpu_local_irq(bool rescan)
continue;
}
inc_irq_stat(IRQIO_MSI);
- generic_handle_irq(airq_iv_get_data(dibv, bit));
+ hwirq = airq_iv_get_data(dibv, bit);
+ msi_domain = (struct irq_domain *)airq_iv_get_ptr(dibv, bit);
+ generic_handle_domain_irq(msi_domain, hwirq);
}
}
@@ -228,6 +257,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
struct tpi_info *tpi_info)
{
union zpci_sic_iib iib = {{0}};
+ struct irq_domain *msi_domain;
+ irq_hw_number_t hwirq;
unsigned long si, ai;
struct airq_iv *aibv;
int irqs_on = 0;
@@ -255,7 +286,9 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
break;
inc_irq_stat(IRQIO_MSI);
airq_iv_lock(aibv, ai);
- generic_handle_irq(airq_iv_get_data(aibv, ai));
+ hwirq = airq_iv_get_data(aibv, ai);
+ msi_domain = (struct irq_domain *)airq_iv_get_ptr(aibv, ai);
+ generic_handle_domain_irq(msi_domain, hwirq);
airq_iv_unlock(aibv, ai);
}
}
@@ -277,7 +310,9 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
zdev->aisb = *bit;
/* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
+ zdev->aibv = airq_iv_create(msi_vecs,
+ AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_BITLOCK,
+ NULL);
if (!zdev->aibv)
return -ENOMEM;
@@ -289,146 +324,220 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
return 0;
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+bool arch_restore_msi_irqs(struct pci_dev *pdev)
{
- unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
struct zpci_dev *zdev = to_zpci(pdev);
- struct msi_desc *msi;
- struct msi_msg msg;
- unsigned long bit;
- int cpu_addr;
- int rc, irq;
+ zpci_set_irq(zdev);
+ return true;
+}
+
+static struct airq_struct zpci_airq = {
+ .handler = zpci_floating_irq_handler,
+ .isc = PCI_ISC,
+};
+
+static void zpci_msi_teardown_directed(struct zpci_dev *zdev)
+{
+ airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->max_msi);
+ zdev->msi_first_bit = -1U;
+ zdev->msi_nr_irqs = 0;
+}
+
+static void zpci_msi_teardown_floating(struct zpci_dev *zdev)
+{
+ airq_iv_release(zdev->aibv);
+ zdev->aibv = NULL;
+ airq_iv_free_bit(zpci_sbv, zdev->aisb);
zdev->aisb = -1UL;
zdev->msi_first_bit = -1U;
+ zdev->msi_nr_irqs = 0;
+}
+
+static void zpci_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
+{
+ struct zpci_dev *zdev = to_zpci_dev(domain->dev);
+
+ zpci_clear_irq(zdev);
+ if (irq_delivery == DIRECTED)
+ zpci_msi_teardown_directed(zdev);
+ else
+ zpci_msi_teardown_floating(zdev);
+}
+
+static int zpci_msi_prepare(struct irq_domain *domain,
+ struct device *dev, int nvec,
+ msi_alloc_info_t *info)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ unsigned long bit;
+ int msi_vecs, rc;
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
if (msi_vecs < nvec) {
- pr_info("%s requested %d irqs, allocate system limit of %d",
+ pr_info("%s requested %d IRQs, allocate system limit of %d\n",
pci_name(pdev), nvec, zdev->max_msi);
}
rc = __alloc_airq(zdev, msi_vecs, &bit);
- if (rc < 0)
+ if (rc) {
+ pr_err("Allocating adapter IRQs for %s failed\n", pci_name(pdev));
return rc;
+ }
- /*
- * Request MSI interrupts:
- * When using MSI, nvec_used interrupt sources and their irq
- * descriptors are controlled through one msi descriptor.
- * Thus the outer loop over msi descriptors shall run only once,
- * while two inner loops iterate over the interrupt vectors.
- * When using MSI-X, each interrupt vector/irq descriptor
- * is bound to exactly one msi descriptor (nvec_used is one).
- * So the inner loops are executed once, while the outer iterates
- * over the MSI-X descriptors.
- */
- hwirq = bit;
- msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
- if (hwirq - bit >= msi_vecs)
- break;
- irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
- irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
- (irq_delivery == DIRECTED) ?
- msi->affinity : NULL);
- if (irq < 0)
- return -ENOMEM;
+ zdev->msi_first_bit = bit;
+ zdev->msi_nr_irqs = msi_vecs;
+ rc = zpci_set_irq(zdev);
+ if (rc) {
+ pr_err("Registering adapter IRQs for %s failed\n",
+ pci_name(pdev));
+
+ if (irq_delivery == DIRECTED)
+ zpci_msi_teardown_directed(zdev);
+ else
+ zpci_msi_teardown_floating(zdev);
+ return rc;
+ }
+ return 0;
+}
- for (i = 0; i < irqs_per_msi; i++) {
- rc = irq_set_msi_desc_off(irq, i, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
- handle_percpu_irq);
- }
+static int zpci_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct msi_desc *desc = ((msi_alloc_info_t *)args)->desc;
+ struct zpci_dev *zdev = to_zpci_dev(desc->dev);
+ struct zpci_bus *zbus = zdev->zbus;
+ unsigned int cpu, hwirq;
+ unsigned long bit;
+ int i;
- msg.data = hwirq - bit;
- if (irq_delivery == DIRECTED) {
- if (msi->affinity)
- cpu = cpumask_first(&msi->affinity->mask);
- else
- cpu = 0;
- cpu_addr = smp_cpu_get_cpu_address(cpu);
+ bit = zdev->msi_first_bit + desc->msi_index;
+ hwirq = zpci_encode_hwirq(zdev->devfn, desc->msi_index);
- msg.address_lo = zdev->msi_addr & 0xff0000ff;
- msg.address_lo |= (cpu_addr << 8);
+ if (desc->msi_index + nr_irqs > zdev->max_msi)
+ return -EINVAL;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+ &zpci_irq_chip, zdev,
+ handle_percpu_irq, NULL, NULL);
+
+ if (irq_delivery == DIRECTED) {
for_each_possible_cpu(cpu) {
- for (i = 0; i < irqs_per_msi; i++)
- airq_iv_set_data(zpci_ibv[cpu],
- hwirq + i, irq + i);
+ airq_iv_set_ptr(zpci_ibv[cpu], bit + i,
+ (unsigned long)zbus->msi_parent_domain);
+ airq_iv_set_data(zpci_ibv[cpu], bit + i, hwirq + i);
}
} else {
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- for (i = 0; i < irqs_per_msi; i++)
- airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
+ airq_iv_set_ptr(zdev->aibv, bit + i,
+ (unsigned long)zbus->msi_parent_domain);
+ airq_iv_set_data(zdev->aibv, bit + i, hwirq + i);
}
- msg.address_hi = zdev->msi_addr >> 32;
- pci_write_msi_msg(irq, &msg);
- hwirq += irqs_per_msi;
}
- zdev->msi_first_bit = bit;
- zdev->msi_nr_irqs = hwirq - bit;
-
- rc = zpci_set_irq(zdev);
- if (rc)
- return rc;
-
- return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
+ return 0;
}
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
+static void zpci_msi_clear_airq(struct irq_data *d, int i)
{
- struct zpci_dev *zdev = to_zpci(pdev);
- struct msi_desc *msi;
- unsigned int i;
- int rc;
+ struct msi_desc *desc = irq_data_get_msi_desc(d);
+ struct zpci_dev *zdev = to_zpci_dev(desc->dev);
+ unsigned long bit;
+ unsigned int cpu;
+ u16 msi_index;
- /* Disable interrupts */
- rc = zpci_clear_irq(zdev);
- if (rc)
- return;
+ msi_index = zpci_decode_hwirq_msi_index(d->hwirq);
+ bit = zdev->msi_first_bit + msi_index;
- /* Release MSI interrupts */
- msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
- for (i = 0; i < msi->nvec_used; i++) {
- irq_set_msi_desc(msi->irq + i, NULL);
- irq_free_desc(msi->irq + i);
+ if (irq_delivery == DIRECTED) {
+ for_each_possible_cpu(cpu) {
+ airq_iv_set_ptr(zpci_ibv[cpu], bit + i, 0);
+ airq_iv_set_data(zpci_ibv[cpu], bit + i, 0);
}
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
+ } else {
+ airq_iv_set_ptr(zdev->aibv, bit + i, 0);
+ airq_iv_set_data(zdev->aibv, bit + i, 0);
}
+}
- if (zdev->aisb != -1UL) {
- zpci_ibv[zdev->aisb] = NULL;
- airq_iv_free_bit(zpci_sbv, zdev->aisb);
- zdev->aisb = -1UL;
- }
- if (zdev->aibv) {
- airq_iv_release(zdev->aibv);
- zdev->aibv = NULL;
- }
+static void zpci_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d;
+ int i;
- if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
- airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
+ for (i = 0; i < nr_irqs; i++) {
+ d = irq_domain_get_irq_data(domain, virq + i);
+ zpci_msi_clear_airq(d, i);
+ irq_domain_reset_irq_data(d);
+ }
}
-bool arch_restore_msi_irqs(struct pci_dev *pdev)
+static const struct irq_domain_ops zpci_msi_domain_ops = {
+ .alloc = zpci_msi_domain_alloc,
+ .free = zpci_msi_domain_free,
+};
+
+static bool zpci_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent,
+ struct msi_domain_info *info)
{
- struct zpci_dev *zdev = to_zpci(pdev);
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
+
+ info->ops->msi_prepare = zpci_msi_prepare;
+ info->ops->msi_teardown = zpci_msi_teardown;
- zpci_set_irq(zdev);
return true;
}
-static struct airq_struct zpci_airq = {
- .handler = zpci_floating_irq_handler,
- .isc = PCI_ISC,
+static struct msi_parent_ops zpci_msi_parent_ops = {
+ .supported_flags = MSI_GENERIC_FLAGS_MASK |
+ MSI_FLAG_PCI_MSIX |
+ MSI_FLAG_MULTI_PCI_MSI,
+ .required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
+ MSI_FLAG_USE_DEF_CHIP_OPS,
+ .init_dev_msi_info = zpci_init_dev_msi_info,
};
+int zpci_create_parent_msi_domain(struct zpci_bus *zbus)
+{
+ char fwnode_name[18];
+
+ snprintf(fwnode_name, sizeof(fwnode_name), "ZPCI_MSI_DOM_%04x", zbus->domain_nr);
+ struct irq_domain_info info = {
+ .fwnode = irq_domain_alloc_named_fwnode(fwnode_name),
+ .ops = &zpci_msi_domain_ops,
+ };
+
+ if (!info.fwnode) {
+ pr_err("Failed to allocate fwnode for MSI IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ if (irq_delivery == FLOATING)
+ zpci_msi_parent_ops.required_flags |= MSI_FLAG_NO_AFFINITY;
+
+ zbus->msi_parent_domain = msi_create_parent_irq_domain(&info, &zpci_msi_parent_ops);
+ if (!zbus->msi_parent_domain) {
+ irq_domain_free_fwnode(info.fwnode);
+ pr_err("Failed to create MSI IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void zpci_remove_parent_msi_domain(struct zpci_bus *zbus)
+{
+ struct fwnode_handle *fn;
+
+ fn = zbus->msi_parent_domain->fwnode;
+ irq_domain_remove(zbus->msi_parent_domain);
+ irq_domain_free_fwnode(fn);
+}
+
static void __init cpu_enable_directed_irq(void *unused)
{
union zpci_sic_iib iib = {{0}};
@@ -465,6 +574,7 @@ static int __init zpci_directed_irq_init(void)
* is only done on the first vector.
*/
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
+ AIRQ_IV_PTR |
AIRQ_IV_DATA |
AIRQ_IV_CACHELINE |
(!cpu ? AIRQ_IV_ALLOC : 0), NULL);
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 104aa5355090..239c1744a926 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -299,6 +299,7 @@ config DM_CRYPT
select CRYPTO
select CRYPTO_CBC
select CRYPTO_ESSIV
+ select CRYPTO_LIB_MD5 # needed by lmk IV mode
help
This device-mapper target allows you to create a device that
transparently encrypts the data on it. You'll need to activate
@@ -546,6 +547,7 @@ config DM_VERITY
depends on BLK_DEV_DM
select CRYPTO
select CRYPTO_HASH
+ select CRYPTO_LIB_SHA256
select DM_BUFIO
help
This device-mapper target creates a read-only device that
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index e6d28be11c5c..5235f3e4924b 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1374,7 +1374,7 @@ static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio
{
unsigned int n_sectors;
sector_t sector;
- unsigned int offset, end;
+ unsigned int offset, end, align;
b->end_io = end_io;
@@ -1388,9 +1388,11 @@ static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio
b->c->write_callback(b);
offset = b->write_start;
end = b->write_end;
- offset &= -DM_BUFIO_WRITE_ALIGN;
- end += DM_BUFIO_WRITE_ALIGN - 1;
- end &= -DM_BUFIO_WRITE_ALIGN;
+ align = max(DM_BUFIO_WRITE_ALIGN,
+ bdev_physical_block_size(b->c->bdev));
+ offset &= -align;
+ end += align - 1;
+ end &= -align;
if (unlikely(end > b->c->block_size))
end = b->c->block_size;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index a3c9f74fe2dc..1cda8618d74d 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -139,7 +139,6 @@ struct mapped_device {
struct srcu_struct io_barrier;
#ifdef CONFIG_BLK_DEV_ZONED
- unsigned int nr_zones;
void *zone_revalidate_map;
struct task_struct *revalidate_map_task;
#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5ef43231fe77..79704fbc523b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -21,6 +21,7 @@
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/crypto.h>
+#include <linux/fips.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/backing-dev.h>
@@ -120,7 +121,6 @@ struct iv_benbi_private {
#define LMK_SEED_SIZE 64 /* hash + 0 */
struct iv_lmk_private {
- struct crypto_shash *hash_tfm;
u8 *seed;
};
@@ -254,22 +254,15 @@ static unsigned int max_write_size = 0;
module_param(max_write_size, uint, 0644);
MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
-static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio)
+static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio, bool no_split)
{
struct crypt_config *cc = ti->private;
unsigned val, sector_align;
bool wrt = op_is_write(bio_op(bio));
- if (wrt) {
- /*
- * For zoned devices, splitting write operations creates the
- * risk of deadlocking queue freeze operations with zone write
- * plugging BIO work when the reminder of a split BIO is
- * issued. So always allow the entire BIO to proceed.
- */
- if (ti->emulate_zone_append)
- return bio_sectors(bio);
-
+ if (no_split) {
+ val = -1;
+ } else if (wrt) {
val = min_not_zero(READ_ONCE(max_write_size),
DM_CRYPT_DEFAULT_MAX_WRITE_SIZE);
} else {
@@ -465,10 +458,6 @@ static void crypt_iv_lmk_dtr(struct crypt_config *cc)
{
struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
- if (lmk->hash_tfm && !IS_ERR(lmk->hash_tfm))
- crypto_free_shash(lmk->hash_tfm);
- lmk->hash_tfm = NULL;
-
kfree_sensitive(lmk->seed);
lmk->seed = NULL;
}
@@ -483,11 +472,10 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
return -EINVAL;
}
- lmk->hash_tfm = crypto_alloc_shash("md5", 0,
- CRYPTO_ALG_ALLOCATES_MEMORY);
- if (IS_ERR(lmk->hash_tfm)) {
- ti->error = "Error initializing LMK hash";
- return PTR_ERR(lmk->hash_tfm);
+ if (fips_enabled) {
+ ti->error = "LMK support is disabled due to FIPS";
+ /* ... because it uses MD5. */
+ return -EINVAL;
}
/* No seed in LMK version 2 */
@@ -498,7 +486,6 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
lmk->seed = kzalloc(LMK_SEED_SIZE, GFP_KERNEL);
if (!lmk->seed) {
- crypt_iv_lmk_dtr(cc);
ti->error = "Error kmallocing seed storage in LMK";
return -ENOMEM;
}
@@ -514,7 +501,7 @@ static int crypt_iv_lmk_init(struct crypt_config *cc)
/* LMK seed is on the position of LMK_KEYS + 1 key */
if (lmk->seed)
memcpy(lmk->seed, cc->key + (cc->tfms_count * subkey_size),
- crypto_shash_digestsize(lmk->hash_tfm));
+ MD5_DIGEST_SIZE);
return 0;
}
@@ -529,55 +516,31 @@ static int crypt_iv_lmk_wipe(struct crypt_config *cc)
return 0;
}
-static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
- struct dm_crypt_request *dmreq,
- u8 *data)
+static void crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq, u8 *data)
{
struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
- SHASH_DESC_ON_STACK(desc, lmk->hash_tfm);
- union {
- struct md5_state md5state;
- u8 state[CRYPTO_MD5_STATESIZE];
- } u;
+ struct md5_ctx ctx;
__le32 buf[4];
- int i, r;
- desc->tfm = lmk->hash_tfm;
+ md5_init(&ctx);
- r = crypto_shash_init(desc);
- if (r)
- return r;
-
- if (lmk->seed) {
- r = crypto_shash_update(desc, lmk->seed, LMK_SEED_SIZE);
- if (r)
- return r;
- }
+ if (lmk->seed)
+ md5_update(&ctx, lmk->seed, LMK_SEED_SIZE);
/* Sector is always 512B, block size 16, add data of blocks 1-31 */
- r = crypto_shash_update(desc, data + 16, 16 * 31);
- if (r)
- return r;
+ md5_update(&ctx, data + 16, 16 * 31);
/* Sector is cropped to 56 bits here */
buf[0] = cpu_to_le32(dmreq->iv_sector & 0xFFFFFFFF);
buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000);
buf[2] = cpu_to_le32(4024);
buf[3] = 0;
- r = crypto_shash_update(desc, (u8 *)buf, sizeof(buf));
- if (r)
- return r;
+ md5_update(&ctx, (u8 *)buf, sizeof(buf));
/* No MD5 padding here */
- r = crypto_shash_export(desc, &u.md5state);
- if (r)
- return r;
-
- for (i = 0; i < MD5_HASH_WORDS; i++)
- __cpu_to_le32s(&u.md5state.hash[i]);
- memcpy(iv, &u.md5state.hash, cc->iv_size);
-
- return 0;
+ cpu_to_le32_array(ctx.state.h, ARRAY_SIZE(ctx.state.h));
+ memcpy(iv, ctx.state.h, cc->iv_size);
}
static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
@@ -585,17 +548,15 @@ static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
{
struct scatterlist *sg;
u8 *src;
- int r = 0;
if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) {
sg = crypt_get_sg_data(cc, dmreq->sg_in);
src = kmap_local_page(sg_page(sg));
- r = crypt_iv_lmk_one(cc, iv, dmreq, src + sg->offset);
+ crypt_iv_lmk_one(cc, iv, dmreq, src + sg->offset);
kunmap_local(src);
} else
memset(iv, 0, cc->iv_size);
-
- return r;
+ return 0;
}
static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
@@ -603,21 +564,19 @@ static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
{
struct scatterlist *sg;
u8 *dst;
- int r;
if (bio_data_dir(dmreq->ctx->bio_in) == WRITE)
return 0;
sg = crypt_get_sg_data(cc, dmreq->sg_out);
dst = kmap_local_page(sg_page(sg));
- r = crypt_iv_lmk_one(cc, iv, dmreq, dst + sg->offset);
+ crypt_iv_lmk_one(cc, iv, dmreq, dst + sg->offset);
/* Tweak the first block of plaintext sector */
- if (!r)
- crypto_xor(dst + sg->offset, iv, cc->iv_size);
+ crypto_xor(dst + sg->offset, iv, cc->iv_size);
kunmap_local(dst);
- return r;
+ return 0;
}
static void crypt_iv_tcw_dtr(struct crypt_config *cc)
@@ -1781,7 +1740,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
bio_for_each_folio_all(fi, clone) {
if (folio_test_large(fi.folio)) {
percpu_counter_sub(&cc->n_allocated_pages,
- 1 << folio_order(fi.folio));
+ folio_nr_pages(fi.folio));
folio_put(fi.folio);
} else {
mempool_free(&fi.folio->page, &cc->page_pool);
@@ -3496,6 +3455,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
struct dm_crypt_io *io;
struct crypt_config *cc = ti->private;
unsigned max_sectors;
+ bool no_split;
/*
* If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
@@ -3513,10 +3473,20 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/*
* Check if bio is too large, split as needed.
+ *
+ * For zoned devices, splitting write operations creates the
+ * risk of deadlocking queue freeze operations with zone write
+ * plugging BIO work when the reminder of a split BIO is
+ * issued. So always allow the entire BIO to proceed.
*/
- max_sectors = get_max_request_sectors(ti, bio);
- if (unlikely(bio_sectors(bio) > max_sectors))
+ no_split = (ti->emulate_zone_append && op_is_write(bio_op(bio))) ||
+ (bio->bi_opf & REQ_ATOMIC);
+ max_sectors = get_max_request_sectors(ti, bio, no_split);
+ if (unlikely(bio_sectors(bio) > max_sectors)) {
+ if (unlikely(no_split))
+ return DM_MAPIO_KILL;
dm_accept_partial_bio(bio, max_sectors);
+ }
/*
* Ensure that bio is a multiple of internal sector encryption size
@@ -3762,15 +3732,20 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (ti->emulate_zone_append)
limits->max_hw_sectors = min(limits->max_hw_sectors,
BIO_MAX_VECS << PAGE_SECTORS_SHIFT);
+
+ limits->atomic_write_hw_unit_max = min(limits->atomic_write_hw_unit_max,
+ BIO_MAX_VECS << PAGE_SHIFT);
+ limits->atomic_write_hw_max = min(limits->atomic_write_hw_max,
+ BIO_MAX_VECS << PAGE_SHIFT);
}
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 28, 0},
+ .version = {1, 29, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
- .features = DM_TARGET_ZONED_HM,
+ .features = DM_TARGET_ZONED_HM | DM_TARGET_ATOMIC_WRITES,
.report_zones = crypt_report_zones,
.map = crypt_map,
.status = crypt_status,
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 6abb31ca9662..b354e74a670e 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -103,7 +103,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, enum req_op op, struct bio_vec *bv,
} else {
flush_dcache_page(bv->bv_page);
memcpy(ba, pa, cur_len);
- dm_bufio_mark_partial_buffer_dirty(b, buf_off, buf_off + cur_len);
+ dm_bufio_mark_buffer_dirty(b);
}
dm_bufio_release(b);
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index b67976637538..061b4d310813 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -29,7 +29,7 @@ typedef sector_t chunk_t;
* chunk within the device.
*/
struct dm_exception {
- struct hlist_bl_node hash_list;
+ struct hlist_node hash_list;
chunk_t old_chunk;
chunk_t new_chunk;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 7bb7174f8f4f..f0c84e7a5daa 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -432,6 +432,7 @@ static int log_writes_kthread(void *arg)
struct log_writes_c *lc = arg;
sector_t sector = 0;
+ set_freezable();
while (!kthread_should_stop()) {
bool super = false;
bool logging_enabled;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aaf4a0a4b0eb..c18358271618 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -131,7 +131,7 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
-#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */
+/* MPATHF_RETAIN_ATTACHED_HW_HANDLER no longer has any effect */
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
@@ -237,16 +237,10 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
{
- if (m->queue_mode == DM_TYPE_NONE) {
+ if (m->queue_mode == DM_TYPE_NONE)
m->queue_mode = DM_TYPE_REQUEST_BASED;
- } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ else if (m->queue_mode == DM_TYPE_BIO_BASED)
INIT_WORK(&m->process_queued_bios, process_queued_bios);
- /*
- * bio-based doesn't support any direct scsi_dh management;
- * it just discovers if a scsi_dh is attached.
- */
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
- }
dm_table_set_type(ti->table, m->queue_mode);
@@ -887,36 +881,30 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
struct request_queue *q = bdev_get_queue(bdev);
int r;
- if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
-retain:
- if (*attached_handler_name) {
- /*
- * Clear any hw_handler_params associated with a
- * handler that isn't already attached.
- */
- if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
- kfree(m->hw_handler_params);
- m->hw_handler_params = NULL;
- }
-
- /*
- * Reset hw_handler_name to match the attached handler
- *
- * NB. This modifies the table line to show the actual
- * handler instead of the original table passed in.
- */
- kfree(m->hw_handler_name);
- m->hw_handler_name = *attached_handler_name;
- *attached_handler_name = NULL;
+ if (*attached_handler_name) {
+ /*
+ * Clear any hw_handler_params associated with a
+ * handler that isn't already attached.
+ */
+ if (m->hw_handler_name && strcmp(*attached_handler_name,
+ m->hw_handler_name)) {
+ kfree(m->hw_handler_params);
+ m->hw_handler_params = NULL;
}
+
+ /*
+ * Reset hw_handler_name to match the attached handler
+ *
+ * NB. This modifies the table line to show the actual
+ * handler instead of the original table passed in.
+ */
+ kfree(m->hw_handler_name);
+ m->hw_handler_name = *attached_handler_name;
+ *attached_handler_name = NULL;
}
if (m->hw_handler_name) {
r = scsi_dh_attach(q, m->hw_handler_name);
- if (r == -EBUSY) {
- DMINFO("retaining handler on device %pg", bdev);
- goto retain;
- }
if (r < 0) {
*error = "error attaching hardware handler";
return r;
@@ -1138,7 +1126,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
}
if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+ /* no longer has any effect */
continue;
}
@@ -1823,7 +1811,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
(m->pg_init_retries > 0) * 2 +
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
- test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
(m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
@@ -1832,8 +1819,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("pg_init_retries %u ", m->pg_init_retries);
if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
- if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
- DMEMIT("retain_attached_hw_handler ");
if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
switch (m->queue_mode) {
case DM_TYPE_BIO_BASED:
@@ -2307,7 +2292,7 @@ static struct target_type multipath_target = {
.name = "multipath",
.version = {1, 15, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
- DM_TARGET_PASSES_INTEGRITY,
+ DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ATOMIC_WRITES,
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
diff --git a/drivers/md/dm-pcache/cache.c b/drivers/md/dm-pcache/cache.c
index 698697a7a73c..534bf07b794f 100644
--- a/drivers/md/dm-pcache/cache.c
+++ b/drivers/md/dm-pcache/cache.c
@@ -10,7 +10,8 @@ struct kmem_cache *key_cache;
static inline struct pcache_cache_info *get_cache_info_addr(struct pcache_cache *cache)
{
- return cache->cache_info_addr + cache->info_index;
+ return (struct pcache_cache_info *)((char *)cache->cache_info_addr +
+ (size_t)cache->info_index * PCACHE_CACHE_INFO_SIZE);
}
static void cache_info_write(struct pcache_cache *cache)
@@ -21,10 +22,10 @@ static void cache_info_write(struct pcache_cache *cache)
cache_info->header.crc = pcache_meta_crc(&cache_info->header,
sizeof(struct pcache_cache_info));
+ cache->info_index = (cache->info_index + 1) % PCACHE_META_INDEX_MAX;
memcpy_flushcache(get_cache_info_addr(cache), cache_info,
sizeof(struct pcache_cache_info));
-
- cache->info_index = (cache->info_index + 1) % PCACHE_META_INDEX_MAX;
+ pmem_wmb();
}
static void cache_info_init_default(struct pcache_cache *cache);
@@ -49,6 +50,8 @@ static int cache_info_init(struct pcache_cache *cache, struct pcache_cache_optio
return -EINVAL;
}
+ cache->info_index = ((char *)cache_info_addr - (char *)cache->cache_info_addr) / PCACHE_CACHE_INFO_SIZE;
+
return 0;
}
@@ -93,10 +96,10 @@ void cache_pos_encode(struct pcache_cache *cache,
pos_onmedia.header.seq = seq;
pos_onmedia.header.crc = cache_pos_onmedia_crc(&pos_onmedia);
+ *index = (*index + 1) % PCACHE_META_INDEX_MAX;
+
memcpy_flushcache(pos_onmedia_addr, &pos_onmedia, sizeof(struct pcache_cache_pos_onmedia));
pmem_wmb();
-
- *index = (*index + 1) % PCACHE_META_INDEX_MAX;
}
int cache_pos_decode(struct pcache_cache *cache,
diff --git a/drivers/md/dm-pcache/cache_segment.c b/drivers/md/dm-pcache/cache_segment.c
index f0b58980806e..9d92e2b067ed 100644
--- a/drivers/md/dm-pcache/cache_segment.c
+++ b/drivers/md/dm-pcache/cache_segment.c
@@ -26,11 +26,11 @@ static void cache_seg_info_write(struct pcache_cache_segment *cache_seg)
seg_info->header.seq++;
seg_info->header.crc = pcache_meta_crc(&seg_info->header, sizeof(struct pcache_segment_info));
+ cache_seg->info_index = (cache_seg->info_index + 1) % PCACHE_META_INDEX_MAX;
+
seg_info_addr = get_seg_info_addr(cache_seg);
memcpy_flushcache(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
pmem_wmb();
-
- cache_seg->info_index = (cache_seg->info_index + 1) % PCACHE_META_INDEX_MAX;
mutex_unlock(&cache_seg->info_lock);
}
@@ -56,7 +56,10 @@ static int cache_seg_info_load(struct pcache_cache_segment *cache_seg)
ret = -EIO;
goto out;
}
- cache_seg->info_index = cache_seg_info_addr - cache_seg_info_addr_base;
+
+ cache_seg->info_index =
+ ((char *)cache_seg_info_addr - (char *)cache_seg_info_addr_base) /
+ PCACHE_SEG_INFO_SIZE;
out:
mutex_unlock(&cache_seg->info_lock);
@@ -129,10 +132,10 @@ static void cache_seg_ctrl_write(struct pcache_cache_segment *cache_seg)
cache_seg_gen.header.crc = pcache_meta_crc(&cache_seg_gen.header,
sizeof(struct pcache_cache_seg_gen));
+ cache_seg->gen_index = (cache_seg->gen_index + 1) % PCACHE_META_INDEX_MAX;
+
memcpy_flushcache(get_cache_seg_gen_addr(cache_seg), &cache_seg_gen, sizeof(struct pcache_cache_seg_gen));
pmem_wmb();
-
- cache_seg->gen_index = (cache_seg->gen_index + 1) % PCACHE_META_INDEX_MAX;
}
static void cache_seg_ctrl_init(struct pcache_cache_segment *cache_seg)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index c6f7129e43d3..4bacdc499984 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2287,6 +2287,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
mddev->reshape_position = le64_to_cpu(sb->reshape_position);
rs->raid_type = get_raid_type_by_ll(mddev->level, mddev->layout);
+ if (!rs->raid_type)
+ return -EINVAL;
}
} else {
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f40c18da4000..dbd148967de4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
+struct dm_hlist_head {
+ struct hlist_head head;
+ spinlock_t lock;
+};
+
struct dm_exception_table {
uint32_t hash_mask;
unsigned int hash_shift;
- struct hlist_bl_head *table;
+ struct dm_hlist_head *table;
};
struct dm_snapshot {
@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
/* Lock to protect access to the completed and pending exception hash tables. */
struct dm_exception_table_lock {
- struct hlist_bl_head *complete_slot;
- struct hlist_bl_head *pending_slot;
+ spinlock_t *complete_slot;
+ spinlock_t *pending_slot;
};
static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
struct dm_exception_table *complete = &s->complete;
struct dm_exception_table *pending = &s->pending;
- lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
- lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
+ lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock;
+ lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock;
}
static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
{
- hlist_bl_lock(lock->complete_slot);
- hlist_bl_lock(lock->pending_slot);
+ spin_lock_nested(lock->complete_slot, 1);
+ spin_lock_nested(lock->pending_slot, 2);
}
static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
{
- hlist_bl_unlock(lock->pending_slot);
- hlist_bl_unlock(lock->complete_slot);
+ spin_unlock(lock->pending_slot);
+ spin_unlock(lock->complete_slot);
}
static int dm_exception_table_init(struct dm_exception_table *et,
@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et,
et->hash_shift = hash_shift;
et->hash_mask = size - 1;
- et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
+ et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head),
GFP_KERNEL);
if (!et->table)
return -ENOMEM;
- for (i = 0; i < size; i++)
- INIT_HLIST_BL_HEAD(et->table + i);
+ for (i = 0; i < size; i++) {
+ INIT_HLIST_HEAD(&et->table[i].head);
+ spin_lock_init(&et->table[i].lock);
+ }
return 0;
}
@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et,
static void dm_exception_table_exit(struct dm_exception_table *et,
struct kmem_cache *mem)
{
- struct hlist_bl_head *slot;
+ struct dm_hlist_head *slot;
struct dm_exception *ex;
- struct hlist_bl_node *pos, *n;
+ struct hlist_node *pos;
int i, size;
size = et->hash_mask + 1;
for (i = 0; i < size; i++) {
slot = et->table + i;
- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
+ hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) {
+ hlist_del(&ex->hash_list);
kmem_cache_free(mem, ex);
cond_resched();
}
@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
static void dm_remove_exception(struct dm_exception *e)
{
- hlist_bl_del(&e->hash_list);
+ hlist_del(&e->hash_list);
}
/*
@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e)
static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
chunk_t chunk)
{
- struct hlist_bl_head *slot;
- struct hlist_bl_node *pos;
+ struct hlist_head *slot;
struct dm_exception *e;
- slot = &et->table[exception_hash(et, chunk)];
- hlist_bl_for_each_entry(e, pos, slot, hash_list)
+ slot = &et->table[exception_hash(et, chunk)].head;
+ hlist_for_each_entry(e, slot, hash_list)
if (chunk >= e->old_chunk &&
chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
return e;
@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
static void dm_insert_exception(struct dm_exception_table *eh,
struct dm_exception *new_e)
{
- struct hlist_bl_head *l;
- struct hlist_bl_node *pos;
+ struct hlist_head *l;
struct dm_exception *e = NULL;
- l = &eh->table[exception_hash(eh, new_e->old_chunk)];
+ l = &eh->table[exception_hash(eh, new_e->old_chunk)].head;
/* Add immediately if this table doesn't support consecutive chunks */
if (!eh->hash_shift)
goto out;
/* List is ordered by old_chunk */
- hlist_bl_for_each_entry(e, pos, l, hash_list) {
+ hlist_for_each_entry(e, l, hash_list) {
/* Insert after an existing chunk? */
if (new_e->old_chunk == (e->old_chunk +
dm_consecutive_chunk_count(e) + 1) &&
@@ -804,13 +810,13 @@ out:
* Either the table doesn't support consecutive chunks or slot
* l is empty.
*/
- hlist_bl_add_head(&new_e->hash_list, l);
+ hlist_add_head(&new_e->hash_list, l);
} else if (new_e->old_chunk < e->old_chunk) {
/* Add before an existing exception */
- hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
+ hlist_add_before(&new_e->hash_list, &e->hash_list);
} else {
/* Add to l's tail: e is the last exception in this slot */
- hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
+ hlist_add_behind(&new_e->hash_list, &e->hash_list);
}
}
@@ -820,7 +826,6 @@ out:
*/
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
{
- struct dm_exception_table_lock lock;
struct dm_snapshot *s = context;
struct dm_exception *e;
@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
/* Consecutive_count is implicitly initialised to zero */
e->new_chunk = new;
- /*
- * Although there is no need to lock access to the exception tables
- * here, if we don't then hlist_bl_add_head(), called by
- * dm_insert_exception(), will complain about accessing the
- * corresponding list without locking it first.
- */
- dm_exception_table_lock_init(s, old, &lock);
-
- dm_exception_table_lock(&lock);
dm_insert_exception(&s->complete, e);
- dm_exception_table_unlock(&lock);
return 0;
}
@@ -873,7 +868,7 @@ static int calc_max_buckets(void)
/* use a fixed size of 2MB */
unsigned long mem = 2 * 1024 * 1024;
- mem /= sizeof(struct hlist_bl_head);
+ mem /= sizeof(struct dm_hlist_head);
return mem;
}
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index bfaef27ca79f..22bc70923a83 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -86,17 +86,13 @@ static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
{
- sprintf(buf, "%d\n", dm_suspended_md(md));
-
- return strlen(buf);
+ return sysfs_emit(buf, "%d\n", dm_suspended_md(md));
}
static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
{
/* Purely for userspace compatibility */
- sprintf(buf, "%d\n", true);
-
- return strlen(buf);
+ return sysfs_emit(buf, "%d\n", true);
}
static DM_ATTR_RO(name);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ad0a60a07b93..0522cd700e0e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2043,6 +2043,10 @@ bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
return true;
}
+/*
+ * This function will be skipped by noflush reloads of immutable request
+ * based devices (dm-mpath).
+ */
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c84149ba4e38..52ffb495f5a8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -395,13 +395,13 @@ static void begin_discard(struct discard_op *op, struct thin_c *tc, struct bio *
op->bio = NULL;
}
-static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
+static void issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
{
struct thin_c *tc = op->tc;
sector_t s = block_to_sectors(tc->pool, data_b);
sector_t len = block_to_sectors(tc->pool, data_e - data_b);
- return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
+ __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
}
static void end_discard(struct discard_op *op, int r)
@@ -1113,9 +1113,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
break;
}
- r = issue_discard(&op, b, e);
- if (r)
- goto out;
+ issue_discard(&op, b, e);
b = e;
}
@@ -1188,8 +1186,8 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
struct discard_op op;
begin_discard(&op, tc, discard_parent);
- r = issue_discard(&op, m->data_block, data_end);
- end_discard(&op, r);
+ issue_discard(&op, m->data_block, data_end);
+ end_discard(&op, 0);
}
}
@@ -4383,11 +4381,8 @@ static void thin_postsuspend(struct dm_target *ti)
{
struct thin_c *tc = ti->private;
- /*
- * The dm_noflush_suspending flag has been cleared by now, so
- * unfortunately we must always run this.
- */
- noflush_work(tc, do_noflush_stop);
+ if (dm_noflush_suspending(ti))
+ noflush_work(tc, do_noflush_stop);
}
static int thin_preresume(struct dm_target *ti)
diff --git a/drivers/md/dm-vdo/action-manager.c b/drivers/md/dm-vdo/action-manager.c
index a0e5e7077d13..e3bba0b28aad 100644
--- a/drivers/md/dm-vdo/action-manager.c
+++ b/drivers/md/dm-vdo/action-manager.c
@@ -43,7 +43,7 @@ struct action {
* @actions: The two action slots.
* @current_action: The current action slot.
* @zones: The number of zones in which an action is to be applied.
- * @Scheduler: A function to schedule a default next action.
+ * @scheduler: A function to schedule a default next action.
* @get_zone_thread_id: A function to get the id of the thread on which to apply an action to a
* zone.
* @initiator_thread_id: The ID of the thread on which actions may be initiated.
diff --git a/drivers/md/dm-vdo/admin-state.c b/drivers/md/dm-vdo/admin-state.c
index 3f9dba525154..da153fef085e 100644
--- a/drivers/md/dm-vdo/admin-state.c
+++ b/drivers/md/dm-vdo/admin-state.c
@@ -149,7 +149,8 @@ const struct admin_state_code *VDO_ADMIN_STATE_RESUMING = &VDO_CODE_RESUMING;
/**
* get_next_state() - Determine the state which should be set after a given operation completes
* based on the operation and the current state.
- * @operation The operation to be started.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
*
* Return: The state to set when the operation completes or NULL if the operation can not be
* started in the current state.
@@ -187,6 +188,8 @@ static const struct admin_state_code *get_next_state(const struct admin_state *s
/**
* vdo_finish_operation() - Finish the current operation.
+ * @state: The current admin state.
+ * @result: The result of the operation.
*
* Will notify the operation waiter if there is one. This method should be used for operations
* started with vdo_start_operation(). For operations which were started with vdo_start_draining(),
@@ -214,8 +217,10 @@ bool vdo_finish_operation(struct admin_state *state, int result)
/**
* begin_operation() - Begin an operation if it may be started given the current state.
- * @waiter A completion to notify when the operation is complete; may be NULL.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
+ * @waiter: A completion to notify when the operation is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: VDO_SUCCESS or an error.
*/
@@ -259,8 +264,10 @@ static int __must_check begin_operation(struct admin_state *state,
/**
* start_operation() - Start an operation if it may be started given the current state.
- * @waiter A completion to notify when the operation is complete.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
+ * @waiter: A completion to notify when the operation is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: true if the operation was started.
*/
@@ -274,10 +281,10 @@ static inline bool __must_check start_operation(struct admin_state *state,
/**
* check_code() - Check the result of a state validation.
- * @valid true if the code is of an appropriate type.
- * @code The code which failed to be of the correct type.
- * @what What the code failed to be, for logging.
- * @waiter The completion to notify of the error; may be NULL.
+ * @valid: True if the code is of an appropriate type.
+ * @code: The code which failed to be of the correct type.
+ * @what: What the code failed to be, for logging.
+ * @waiter: The completion to notify of the error; may be NULL.
*
* If the result failed, log an invalid state error and, if there is a waiter, notify it.
*
@@ -301,7 +308,8 @@ static bool check_code(bool valid, const struct admin_state_code *code, const ch
/**
* assert_vdo_drain_operation() - Check that an operation is a drain.
- * @waiter The completion to finish with an error if the operation is not a drain.
+ * @operation: The operation to check.
+ * @waiter: The completion to finish with an error if the operation is not a drain.
*
* Return: true if the specified operation is a drain.
*/
@@ -313,9 +321,10 @@ static bool __must_check assert_vdo_drain_operation(const struct admin_state_cod
/**
* vdo_start_draining() - Initiate a drain operation if the current state permits it.
- * @operation The type of drain to initiate.
- * @waiter The completion to notify when the drain is complete.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of drain to initiate.
+ * @waiter: The completion to notify when the drain is complete.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: true if the drain was initiated, if not the waiter will be notified.
*/
@@ -345,6 +354,7 @@ bool vdo_start_draining(struct admin_state *state,
/**
* vdo_finish_draining() - Finish a drain operation if one was in progress.
+ * @state: The current admin state.
*
* Return: true if the state was draining; will notify the waiter if so.
*/
@@ -355,6 +365,8 @@ bool vdo_finish_draining(struct admin_state *state)
/**
* vdo_finish_draining_with_result() - Finish a drain operation with a status code.
+ * @state: The current admin state.
+ * @result: The result of the drain operation.
*
* Return: true if the state was draining; will notify the waiter if so.
*/
@@ -365,7 +377,8 @@ bool vdo_finish_draining_with_result(struct admin_state *state, int result)
/**
* vdo_assert_load_operation() - Check that an operation is a load.
- * @waiter The completion to finish with an error if the operation is not a load.
+ * @operation: The operation to check.
+ * @waiter: The completion to finish with an error if the operation is not a load.
*
* Return: true if the specified operation is a load.
*/
@@ -377,9 +390,10 @@ bool vdo_assert_load_operation(const struct admin_state_code *operation,
/**
* vdo_start_loading() - Initiate a load operation if the current state permits it.
- * @operation The type of load to initiate.
- * @waiter The completion to notify when the load is complete (may be NULL).
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of load to initiate.
+ * @waiter: The completion to notify when the load is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: true if the load was initiated, if not the waiter will be notified.
*/
@@ -393,6 +407,7 @@ bool vdo_start_loading(struct admin_state *state,
/**
* vdo_finish_loading() - Finish a load operation if one was in progress.
+ * @state: The current admin state.
*
* Return: true if the state was loading; will notify the waiter if so.
*/
@@ -403,7 +418,8 @@ bool vdo_finish_loading(struct admin_state *state)
/**
* vdo_finish_loading_with_result() - Finish a load operation with a status code.
- * @result The result of the load operation.
+ * @state: The current admin state.
+ * @result: The result of the load operation.
*
* Return: true if the state was loading; will notify the waiter if so.
*/
@@ -414,7 +430,8 @@ bool vdo_finish_loading_with_result(struct admin_state *state, int result)
/**
* assert_vdo_resume_operation() - Check whether an admin_state_code is a resume operation.
- * @waiter The completion to notify if the operation is not a resume operation; may be NULL.
+ * @operation: The operation to check.
+ * @waiter: The completion to notify if the operation is not a resume operation; may be NULL.
*
* Return: true if the code is a resume operation.
*/
@@ -427,9 +444,10 @@ static bool __must_check assert_vdo_resume_operation(const struct admin_state_co
/**
* vdo_start_resuming() - Initiate a resume operation if the current state permits it.
- * @operation The type of resume to start.
- * @waiter The completion to notify when the resume is complete (may be NULL).
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of resume to start.
+ * @waiter: The completion to notify when the resume is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: true if the resume was initiated, if not the waiter will be notified.
*/
@@ -443,6 +461,7 @@ bool vdo_start_resuming(struct admin_state *state,
/**
* vdo_finish_resuming() - Finish a resume operation if one was in progress.
+ * @state: The current admin state.
*
* Return: true if the state was resuming; will notify the waiter if so.
*/
@@ -453,7 +472,8 @@ bool vdo_finish_resuming(struct admin_state *state)
/**
* vdo_finish_resuming_with_result() - Finish a resume operation with a status code.
- * @result The result of the resume operation.
+ * @state: The current admin state.
+ * @result: The result of the resume operation.
*
* Return: true if the state was resuming; will notify the waiter if so.
*/
@@ -465,6 +485,7 @@ bool vdo_finish_resuming_with_result(struct admin_state *state, int result)
/**
* vdo_resume_if_quiescent() - Change the state to normal operation if the current state is
* quiescent.
+ * @state: The current admin state.
*
* Return: VDO_SUCCESS if the state resumed, VDO_INVALID_ADMIN_STATE otherwise.
*/
@@ -479,6 +500,8 @@ int vdo_resume_if_quiescent(struct admin_state *state)
/**
* vdo_start_operation() - Attempt to start an operation.
+ * @state: The current admin state.
+ * @operation: The operation to attempt to start.
*
* Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
*/
@@ -490,8 +513,10 @@ int vdo_start_operation(struct admin_state *state,
/**
* vdo_start_operation_with_waiter() - Attempt to start an operation.
- * @waiter the completion to notify when the operation completes or fails to start; may be NULL.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to attempt to start.
+ * @waiter: The completion to notify when the operation completes or fails to start; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
*
* Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
*/
diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c
index baf683cabb1b..a7db5b41155e 100644
--- a/drivers/md/dm-vdo/block-map.c
+++ b/drivers/md/dm-vdo/block-map.c
@@ -174,6 +174,7 @@ static inline struct vdo_page_completion *page_completion_from_waiter(struct vdo
/**
* initialize_info() - Initialize all page info structures and put them on the free list.
+ * @cache: The page cache.
*
* Return: VDO_SUCCESS or an error.
*/
@@ -209,6 +210,7 @@ static int initialize_info(struct vdo_page_cache *cache)
/**
* allocate_cache_components() - Allocate components of the cache which require their own
* allocation.
+ * @cache: The page cache.
*
* The caller is responsible for all clean up on errors.
*
@@ -238,6 +240,8 @@ static int __must_check allocate_cache_components(struct vdo_page_cache *cache)
/**
* assert_on_cache_thread() - Assert that a function has been called on the VDO page cache's
* thread.
+ * @cache: The page cache.
+ * @function_name: The funtion name to report if the assertion fails.
*/
static inline void assert_on_cache_thread(struct vdo_page_cache *cache,
const char *function_name)
@@ -271,6 +275,7 @@ static void report_cache_pressure(struct vdo_page_cache *cache)
/**
* get_page_state_name() - Return the name of a page state.
+ * @state: The page state to describe.
*
* If the page state is invalid a static string is returned and the invalid state is logged.
*
@@ -342,6 +347,8 @@ static void update_lru(struct page_info *info)
/**
* set_info_state() - Set the state of a page_info and put it on the right list, adjusting
* counters.
+ * @info: The page info to update.
+ * @new_state: The new state to set.
*/
static void set_info_state(struct page_info *info, enum vdo_page_buffer_state new_state)
{
@@ -416,6 +423,7 @@ static int reset_page_info(struct page_info *info)
/**
* find_free_page() - Find a free page.
+ * @cache: The page cache.
*
* Return: A pointer to the page info structure (if found), NULL otherwise.
*/
@@ -433,6 +441,7 @@ static struct page_info * __must_check find_free_page(struct vdo_page_cache *cac
/**
* find_page() - Find the page info (if any) associated with a given pbn.
+ * @cache: The page cache.
* @pbn: The absolute physical block number of the page.
*
* Return: The page info for the page if available, or NULL if not.
@@ -449,6 +458,7 @@ static struct page_info * __must_check find_page(struct vdo_page_cache *cache,
/**
* select_lru_page() - Determine which page is least recently used.
+ * @cache: The page cache.
*
* Picks the least recently used from among the non-busy entries at the front of each of the lru
* list. Since whenever we mark a page busy we also put it to the end of the list it is unlikely
@@ -523,6 +533,8 @@ static void complete_waiter_with_page(struct vdo_waiter *waiter, void *page_info
/**
* distribute_page_over_waitq() - Complete a waitq of VDO page completions with a page result.
+ * @info: The loaded page info.
+ * @waitq: The list of waiting data_vios.
*
* Upon completion the waitq will be empty.
*
@@ -548,7 +560,9 @@ static unsigned int distribute_page_over_waitq(struct page_info *info,
/**
* set_persistent_error() - Set a persistent error which all requests will receive in the future.
+ * @cache: The page cache.
* @context: A string describing what triggered the error.
+ * @result: The error result to set on the cache.
*
* Once triggered, all enqueued completions will get this error. Any future requests will result in
* this error as well.
@@ -581,6 +595,7 @@ static void set_persistent_error(struct vdo_page_cache *cache, const char *conte
/**
* validate_completed_page() - Check that a page completion which is being freed to the cache
* referred to a valid page and is in a valid state.
+ * @completion: The page completion to check.
* @writable: Whether a writable page is required.
*
* Return: VDO_SUCCESS if the page was valid, otherwise as error
@@ -758,6 +773,8 @@ static void load_cache_page_endio(struct bio *bio)
/**
* launch_page_load() - Begin the process of loading a page.
+ * @info: The page info to launch.
+ * @pbn: The absolute physical block number of the page to load.
*
* Return: VDO_SUCCESS or an error code.
*/
@@ -836,6 +853,7 @@ static void save_pages(struct vdo_page_cache *cache)
/**
* schedule_page_save() - Add a page to the outgoing list of pages waiting to be saved.
+ * @info: The page info to save.
*
* Once in the list, a page may not be used until it has been written out.
*/
@@ -854,6 +872,7 @@ static void schedule_page_save(struct page_info *info)
/**
* launch_page_save() - Add a page to outgoing pages waiting to be saved, and then start saving
* pages if another save is not in progress.
+ * @info: The page info to save.
*/
static void launch_page_save(struct page_info *info)
{
@@ -864,6 +883,7 @@ static void launch_page_save(struct page_info *info)
/**
* completion_needs_page() - Determine whether a given vdo_page_completion (as a waiter) is
* requesting a given page number.
+ * @waiter: The page completion waiter to check.
* @context: A pointer to the pbn of the desired page.
*
* Implements waiter_match_fn.
@@ -880,6 +900,7 @@ static bool completion_needs_page(struct vdo_waiter *waiter, void *context)
/**
* allocate_free_page() - Allocate a free page to the first completion in the waiting queue, and
* any other completions that match it in page number.
+ * @info: The page info to allocate a page for.
*/
static void allocate_free_page(struct page_info *info)
{
@@ -925,6 +946,7 @@ static void allocate_free_page(struct page_info *info)
/**
* discard_a_page() - Begin the process of discarding a page.
+ * @cache: The page cache.
*
* If no page is discardable, increments a count of deferred frees so that the next release of a
* page which is no longer busy will kick off another discard cycle. This is an indication that the
@@ -955,10 +977,6 @@ static void discard_a_page(struct vdo_page_cache *cache)
launch_page_save(info);
}
-/**
- * discard_page_for_completion() - Helper used to trigger a discard so that the completion can get
- * a different page.
- */
static void discard_page_for_completion(struct vdo_page_completion *vdo_page_comp)
{
struct vdo_page_cache *cache = vdo_page_comp->cache;
@@ -1132,6 +1150,7 @@ static void write_pages(struct vdo_completion *flush_completion)
/**
* vdo_release_page_completion() - Release a VDO Page Completion.
+ * @completion: The page completion to release.
*
* The page referenced by this completion (if any) will no longer be held busy by this completion.
* If a page becomes discardable and there are completions awaiting free pages then a new round of
@@ -1172,10 +1191,6 @@ void vdo_release_page_completion(struct vdo_completion *completion)
}
}
-/**
- * load_page_for_completion() - Helper function to load a page as described by a VDO Page
- * Completion.
- */
static void load_page_for_completion(struct page_info *info,
struct vdo_page_completion *vdo_page_comp)
{
@@ -1319,6 +1334,7 @@ int vdo_get_cached_page(struct vdo_completion *completion,
/**
* vdo_invalidate_page_cache() - Invalidate all entries in the VDO page cache.
+ * @cache: The page cache.
*
* There must not be any dirty pages in the cache.
*
@@ -1345,6 +1361,10 @@ int vdo_invalidate_page_cache(struct vdo_page_cache *cache)
/**
* get_tree_page_by_index() - Get the tree page for a given height and page index.
+ * @forest: The block map forest.
+ * @root_index: The root index of the tree to search.
+ * @height: The height in the tree.
+ * @page_index: The page index.
*
* Return: The requested page.
*/
@@ -2211,6 +2231,7 @@ static void allocate_block_map_page(struct block_map_zone *zone,
/**
* vdo_find_block_map_slot() - Find the block map slot in which the block map entry for a data_vio
* resides and cache that result in the data_vio.
+ * @data_vio: The data vio.
*
* All ancestors in the tree will be allocated or loaded, as needed.
*/
@@ -2435,6 +2456,7 @@ static void deforest(struct forest *forest, size_t first_page_segment)
/**
* make_forest() - Make a collection of trees for a block_map, expanding the existing forest if
* there is one.
+ * @map: The block map.
* @entries: The number of entries the block map will hold.
*
* Return: VDO_SUCCESS or an error.
@@ -2476,6 +2498,7 @@ static int make_forest(struct block_map *map, block_count_t entries)
/**
* replace_forest() - Replace a block_map's forest with the already-prepared larger forest.
+ * @map: The block map.
*/
static void replace_forest(struct block_map *map)
{
@@ -2492,6 +2515,7 @@ static void replace_forest(struct block_map *map)
/**
* finish_cursor() - Finish the traversal of a single tree. If it was the last cursor, finish the
* traversal.
+ * @cursor: The cursor to complete.
*/
static void finish_cursor(struct cursor *cursor)
{
@@ -2549,6 +2573,7 @@ static void traversal_endio(struct bio *bio)
/**
* traverse() - Traverse a single block map tree.
+ * @cursor: A cursor tracking traversal progress.
*
* This is the recursive heart of the traversal process.
*/
@@ -2619,6 +2644,7 @@ static void traverse(struct cursor *cursor)
/**
* launch_cursor() - Start traversing a single block map tree now that the cursor has a VIO with
* which to load pages.
+ * @waiter: The parent of the cursor to launch.
* @context: The pooled_vio just acquired.
*
* Implements waiter_callback_fn.
@@ -2636,6 +2662,8 @@ static void launch_cursor(struct vdo_waiter *waiter, void *context)
/**
* compute_boundary() - Compute the number of pages used at each level of the given root's tree.
+ * @map: The block map.
+ * @root_index: The tree root index.
*
* Return: The list of page counts as a boundary structure.
*/
@@ -2668,6 +2696,7 @@ static struct boundary compute_boundary(struct block_map *map, root_count_t root
/**
* vdo_traverse_forest() - Walk the entire forest of a block map.
+ * @map: The block map.
* @callback: A function to call with the pbn of each allocated node in the forest.
* @completion: The completion to notify on each traversed PBN, and when traversal completes.
*/
@@ -2707,6 +2736,9 @@ void vdo_traverse_forest(struct block_map *map, vdo_entry_callback_fn callback,
/**
* initialize_block_map_zone() - Initialize the per-zone portions of the block map.
+ * @map: The block map.
+ * @zone_number: The zone to initialize.
+ * @cache_size: The total block map cache size.
* @maximum_age: The number of journal blocks before a dirtied page is considered old and must be
* written out.
*/
@@ -3091,6 +3123,7 @@ static void fetch_mapping_page(struct data_vio *data_vio, bool modifiable,
/**
* clear_mapped_location() - Clear a data_vio's mapped block location, setting it to be unmapped.
+ * @data_vio: The data vio.
*
* This indicates the block map entry for the logical block is either unmapped or corrupted.
*/
@@ -3104,6 +3137,8 @@ static void clear_mapped_location(struct data_vio *data_vio)
/**
* set_mapped_location() - Decode and validate a block map entry, and set the mapped location of a
* data_vio.
+ * @data_vio: The data vio.
+ * @entry: The new mapped entry to set.
*
* Return: VDO_SUCCESS or VDO_BAD_MAPPING if the map entry is invalid or an error code for any
* other failure
diff --git a/drivers/md/dm-vdo/completion.c b/drivers/md/dm-vdo/completion.c
index 5ad85334632d..2f00acbb3b2b 100644
--- a/drivers/md/dm-vdo/completion.c
+++ b/drivers/md/dm-vdo/completion.c
@@ -65,6 +65,8 @@ static inline void assert_incomplete(struct vdo_completion *completion)
/**
* vdo_set_completion_result() - Set the result of a completion.
+ * @completion: The completion to update.
+ * @result: The result to set.
*
* Older errors will not be masked.
*/
@@ -77,6 +79,7 @@ void vdo_set_completion_result(struct vdo_completion *completion, int result)
/**
* vdo_launch_completion_with_priority() - Run or enqueue a completion.
+ * @completion: The completion to launch.
* @priority: The priority at which to enqueue the completion.
*
* If called on the correct thread (i.e. the one specified in the completion's callback_thread_id
@@ -125,6 +128,8 @@ void vdo_enqueue_completion(struct vdo_completion *completion,
/**
* vdo_requeue_completion_if_needed() - Requeue a completion if not called on the specified thread.
+ * @completion: The completion to requeue.
+ * @callback_thread_id: The thread on which to requeue the completion.
*
* Return: True if the completion was requeued; callers may not access the completion in this case.
*/
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index 262e11581f2d..3333e1e5b02e 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -227,6 +227,7 @@ static inline u64 get_arrival_time(struct bio *bio)
/**
* check_for_drain_complete_locked() - Check whether a data_vio_pool has no outstanding data_vios
* or waiters while holding the pool's lock.
+ * @pool: The data_vio pool.
*/
static bool check_for_drain_complete_locked(struct data_vio_pool *pool)
{
@@ -387,6 +388,7 @@ struct data_vio_compression_status advance_data_vio_compression_stage(struct dat
/**
* cancel_data_vio_compression() - Prevent this data_vio from being compressed or packed.
+ * @data_vio: The data_vio.
*
* Return: true if the data_vio is in the packer and the caller was the first caller to cancel it.
*/
@@ -483,6 +485,8 @@ static void attempt_logical_block_lock(struct vdo_completion *completion)
/**
* launch_data_vio() - (Re)initialize a data_vio to have a new logical block number, keeping the
* same parent and other state and send it on its way.
+ * @data_vio: The data_vio to launch.
+ * @lbn: The logical block number.
*/
static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lbn)
{
@@ -641,6 +645,7 @@ static void update_limiter(struct limiter *limiter)
/**
* schedule_releases() - Ensure that release processing is scheduled.
+ * @pool: The data_vio pool.
*
* If this call switches the state to processing, enqueue. Otherwise, some other thread has already
* done so.
@@ -768,6 +773,8 @@ static void initialize_limiter(struct limiter *limiter, struct data_vio_pool *po
/**
* initialize_data_vio() - Allocate the components of a data_vio.
+ * @data_vio: The data_vio to initialize.
+ * @vdo: The vdo containing the data_vio.
*
* The caller is responsible for cleaning up the data_vio on error.
*
@@ -880,6 +887,7 @@ int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size,
/**
* free_data_vio_pool() - Free a data_vio_pool and the data_vios in it.
+ * @pool: The data_vio pool to free.
*
* All data_vios must be returned to the pool before calling this function.
*/
@@ -944,6 +952,8 @@ static void wait_permit(struct limiter *limiter, struct bio *bio)
/**
* vdo_launch_bio() - Acquire a data_vio from the pool, assign the bio to it, and launch it.
+ * @pool: The data_vio pool.
+ * @bio: The bio to launch.
*
* This will block if data_vios or discard permits are not available.
*/
@@ -994,6 +1004,7 @@ static void assert_on_vdo_cpu_thread(const struct vdo *vdo, const char *name)
/**
* drain_data_vio_pool() - Wait asynchronously for all data_vios to be returned to the pool.
+ * @pool: The data_vio pool.
* @completion: The completion to notify when the pool has drained.
*/
void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
@@ -1005,6 +1016,7 @@ void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *comp
/**
* resume_data_vio_pool() - Resume a data_vio pool.
+ * @pool: The data_vio pool.
* @completion: The completion to notify when the pool has resumed.
*/
void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
@@ -1024,6 +1036,7 @@ static void dump_limiter(const char *name, struct limiter *limiter)
/**
* dump_data_vio_pool() - Dump a data_vio pool to the log.
+ * @pool: The data_vio pool.
* @dump_vios: Whether to dump the details of each busy data_vio as well.
*/
void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios)
@@ -1114,6 +1127,7 @@ static void perform_cleanup_stage(struct data_vio *data_vio,
/**
* release_allocated_lock() - Release the PBN lock and/or the reference on the allocated block at
* the end of processing a data_vio.
+ * @completion: The data_vio holding the lock.
*/
static void release_allocated_lock(struct vdo_completion *completion)
{
@@ -1194,6 +1208,7 @@ static void transfer_lock(struct data_vio *data_vio, struct lbn_lock *lock)
/**
* release_logical_lock() - Release the logical block lock and flush generation lock at the end of
* processing a data_vio.
+ * @completion: The data_vio holding the lock.
*/
static void release_logical_lock(struct vdo_completion *completion)
{
@@ -1228,6 +1243,7 @@ static void clean_hash_lock(struct vdo_completion *completion)
/**
* finish_cleanup() - Make some assertions about a data_vio which has finished cleaning up.
+ * @data_vio: The data_vio.
*
* If it is part of a multi-block discard, starts on the next block, otherwise, returns it to the
* pool.
@@ -1342,6 +1358,7 @@ void handle_data_vio_error(struct vdo_completion *completion)
/**
* get_data_vio_operation_name() - Get the name of the last asynchronous operation performed on a
* data_vio.
+ * @data_vio: The data_vio.
*/
const char *get_data_vio_operation_name(struct data_vio *data_vio)
{
@@ -1355,7 +1372,7 @@ const char *get_data_vio_operation_name(struct data_vio *data_vio)
/**
* data_vio_allocate_data_block() - Allocate a data block.
- *
+ * @data_vio: The data_vio.
* @write_lock_type: The type of write lock to obtain on the block.
* @callback: The callback which will attempt an allocation in the current zone and continue if it
* succeeds.
@@ -1379,6 +1396,7 @@ void data_vio_allocate_data_block(struct data_vio *data_vio,
/**
* release_data_vio_allocation_lock() - Release the PBN lock on a data_vio's allocated block.
+ * @data_vio: The data_vio.
* @reset: If true, the allocation will be reset (i.e. any allocated pbn will be forgotten).
*
* If the reference to the locked block is still provisional, it will be released as well.
@@ -1399,6 +1417,7 @@ void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset)
/**
* uncompress_data_vio() - Uncompress the data a data_vio has just read.
+ * @data_vio: The data_vio.
* @mapping_state: The mapping state indicating which fragment to decompress.
* @buffer: The buffer to receive the uncompressed data.
*/
@@ -1519,6 +1538,7 @@ static void complete_zero_read(struct vdo_completion *completion)
/**
* read_block() - Read a block asynchronously.
+ * @completion: The data_vio doing the read.
*
* This is the callback registered in read_block_mapping().
*/
@@ -1675,6 +1695,7 @@ static void journal_remapping(struct vdo_completion *completion)
/**
* read_old_block_mapping() - Get the previous PBN/LBN mapping of an in-progress write.
+ * @completion: The data_vio doing the read.
*
* Gets the previous PBN mapped to this LBN from the block map, so as to make an appropriate
* journal entry referencing the removal of this LBN->PBN mapping.
@@ -1704,6 +1725,7 @@ void update_metadata_for_data_vio_write(struct data_vio *data_vio, struct pbn_lo
/**
* pack_compressed_data() - Attempt to pack the compressed data_vio into a block.
+ * @completion: The data_vio.
*
* This is the callback registered in launch_compress_data_vio().
*/
@@ -1725,6 +1747,7 @@ static void pack_compressed_data(struct vdo_completion *completion)
/**
* compress_data_vio() - Do the actual work of compressing the data on a CPU queue.
+ * @completion: The data_vio.
*
* This callback is registered in launch_compress_data_vio().
*/
@@ -1754,6 +1777,7 @@ static void compress_data_vio(struct vdo_completion *completion)
/**
* launch_compress_data_vio() - Continue a write by attempting to compress the data.
+ * @data_vio: The data_vio.
*
* This is a re-entry point to vio_write used by hash locks.
*/
@@ -1796,7 +1820,8 @@ void launch_compress_data_vio(struct data_vio *data_vio)
/**
* hash_data_vio() - Hash the data in a data_vio and set the hash zone (which also flags the record
* name as set).
-
+ * @completion: The data_vio.
+ *
* This callback is registered in prepare_for_dedupe().
*/
static void hash_data_vio(struct vdo_completion *completion)
@@ -1832,6 +1857,7 @@ static void prepare_for_dedupe(struct data_vio *data_vio)
/**
* write_bio_finished() - This is the bio_end_io function registered in write_block() to be called
* when a data_vio's write to the underlying storage has completed.
+ * @bio: The bio to update.
*/
static void write_bio_finished(struct bio *bio)
{
@@ -1884,6 +1910,7 @@ void write_data_vio(struct data_vio *data_vio)
/**
* acknowledge_write_callback() - Acknowledge a write to the requestor.
+ * @completion: The data_vio.
*
* This callback is registered in allocate_block() and continue_write_with_block_map_slot().
*/
@@ -1909,6 +1936,7 @@ static void acknowledge_write_callback(struct vdo_completion *completion)
/**
* allocate_block() - Attempt to allocate a block in the current allocation zone.
+ * @completion: The data_vio.
*
* This callback is registered in continue_write_with_block_map_slot().
*/
@@ -1941,6 +1969,7 @@ static void allocate_block(struct vdo_completion *completion)
/**
* handle_allocation_error() - Handle an error attempting to allocate a block.
+ * @completion: The data_vio.
*
* This error handler is registered in continue_write_with_block_map_slot().
*/
@@ -1970,6 +1999,7 @@ static int assert_is_discard(struct data_vio *data_vio)
/**
* continue_data_vio_with_block_map_slot() - Read the data_vio's mapping from the block map.
+ * @completion: The data_vio to continue.
*
* This callback is registered in launch_read_data_vio().
*/
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
index 4d983092a152..75a26f3f4461 100644
--- a/drivers/md/dm-vdo/dedupe.c
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -917,6 +917,8 @@ static int __must_check acquire_lock(struct hash_zone *zone,
/**
* enter_forked_lock() - Bind the data_vio to a new hash lock.
+ * @waiter: The data_vio's waiter link.
+ * @context: The new hash lock.
*
* Implements waiter_callback_fn. Binds the data_vio that was waiting to a new hash lock and waits
* on that lock.
@@ -971,7 +973,7 @@ static void fork_hash_lock(struct hash_lock *old_lock, struct data_vio *new_agen
* path.
* @lock: The hash lock.
* @data_vio: The data_vio to deduplicate using the hash lock.
- * @has_claim: true if the data_vio already has claimed an increment from the duplicate lock.
+ * @has_claim: True if the data_vio already has claimed an increment from the duplicate lock.
*
* If no increments are available, this will roll over to a new hash lock and launch the data_vio
* as the writing agent for that lock.
@@ -996,7 +998,7 @@ static void launch_dedupe(struct hash_lock *lock, struct data_vio *data_vio,
* true copy of their data on disk.
* @lock: The hash lock.
* @agent: The data_vio acting as the agent for the lock.
- * @agent_is_done: true only if the agent has already written or deduplicated against its data.
+ * @agent_is_done: True only if the agent has already written or deduplicated against its data.
*
* If the agent itself needs to deduplicate, an increment for it must already have been claimed
* from the duplicate lock, ensuring the hash lock will still have a data_vio holding it.
@@ -2146,8 +2148,8 @@ static void start_expiration_timer(struct dedupe_context *context)
/**
* report_dedupe_timeouts() - Record and eventually report that some dedupe requests reached their
* expiration time without getting answers, so we timed them out.
- * @zones: the hash zones.
- * @timeouts: the number of newly timed out requests.
+ * @zones: The hash zones.
+ * @timeouts: The number of newly timed out requests.
*/
static void report_dedupe_timeouts(struct hash_zones *zones, unsigned int timeouts)
{
@@ -2509,6 +2511,8 @@ static void initiate_suspend_index(struct admin_state *state)
/**
* suspend_index() - Suspend the UDS index prior to draining hash zones.
+ * @context: Not used.
+ * @completion: The completion for the suspend operation.
*
* Implements vdo_action_preamble_fn
*/
@@ -2521,21 +2525,13 @@ static void suspend_index(void *context, struct vdo_completion *completion)
initiate_suspend_index);
}
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{
check_for_drain_complete(container_of(state, struct hash_zone, state));
}
-/**
- * drain_hash_zone() - Drain a hash zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
static void drain_hash_zone(void *context, zone_count_t zone_number,
struct vdo_completion *parent)
{
@@ -2572,6 +2568,8 @@ static void launch_dedupe_state_change(struct hash_zones *zones)
/**
* resume_index() - Resume the UDS index prior to resuming hash zones.
+ * @context: Not used.
+ * @parent: The completion for the resume operation.
*
* Implements vdo_action_preamble_fn
*/
@@ -2602,11 +2600,7 @@ static void resume_index(void *context, struct vdo_completion *parent)
vdo_finish_completion(parent);
}
-/**
- * resume_hash_zone() - Resume a hash zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
static void resume_hash_zone(void *context, zone_count_t zone_number,
struct vdo_completion *parent)
{
@@ -2634,7 +2628,7 @@ void vdo_resume_hash_zones(struct hash_zones *zones, struct vdo_completion *pare
/**
* get_hash_zone_statistics() - Add the statistics for this hash zone to the tally for all zones.
* @zone: The hash zone to query.
- * @tally: The tally
+ * @tally: The tally.
*/
static void get_hash_zone_statistics(const struct hash_zone *zone,
struct hash_lock_statistics *tally)
@@ -2680,8 +2674,8 @@ static void get_index_statistics(struct hash_zones *zones,
/**
* vdo_get_dedupe_statistics() - Tally the statistics from all the hash zones and the UDS index.
- * @zones: The hash zones to query
- * @stats: A structure to store the statistics
+ * @zones: The hash zones to query.
+ * @stats: A structure to store the statistics.
*
* Return: The sum of the hash lock statistics from all hash zones plus the statistics from the UDS
* index
@@ -2856,9 +2850,9 @@ void vdo_set_dedupe_index_min_timer_interval(unsigned int value)
/**
* acquire_context() - Acquire a dedupe context from a hash_zone if any are available.
- * @zone: the hash zone
+ * @zone: The hash zone.
*
- * Return: A dedupe_context or NULL if none are available
+ * Return: A dedupe_context or NULL if none are available.
*/
static struct dedupe_context * __must_check acquire_context(struct hash_zone *zone)
{
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index 0e04c2021682..6af40d40f255 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -1144,6 +1144,7 @@ static bool vdo_uses_device(struct vdo *vdo, const void *context)
/**
* get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in
* progress.
+ * @vdo: The vdo.
*/
static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo)
{
@@ -1188,9 +1189,9 @@ static struct vdo_completion *prepare_admin_completion(struct vdo *vdo,
/**
* advance_phase() - Increment the phase of the current admin operation and prepare the admin
* completion to run on the thread for the next phase.
- * @vdo: The on which an admin operation is being performed
+ * @vdo: The vdo on which an admin operation is being performed.
*
- * Return: The current phase
+ * Return: The current phase.
*/
static u32 advance_phase(struct vdo *vdo)
{
diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c
index b7cc0f41caca..dd59691be840 100644
--- a/drivers/md/dm-vdo/encodings.c
+++ b/drivers/md/dm-vdo/encodings.c
@@ -432,7 +432,10 @@ static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
/**
* vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
* level in order to grow the forest to a new number of entries.
+ * @root_count: The number of block map roots.
+ * @old_sizes: The sizes of the old tree segments.
* @entries: The new number of entries the block map must address.
+ * @new_sizes: The sizes of the new tree segments.
*
* Return: The total number of non-leaf pages required.
*/
@@ -462,6 +465,9 @@ block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
/**
* encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset in the buffer at which to encode.
+ * @state: The recovery journal state to encode.
*
* Return: VDO_SUCCESS or an error code.
*/
@@ -484,6 +490,7 @@ static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
/**
* decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
* @buffer: The buffer containing the saved state.
+ * @offset: The offset to start decoding from.
* @state: A pointer to a recovery journal state to hold the result of a successful decode.
*
* Return: VDO_SUCCESS or an error code.
@@ -544,6 +551,9 @@ const char *vdo_get_journal_operation_name(enum journal_operation operation)
/**
* encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset in the buffer at which to encode.
+ * @state: The slab depot state to encode.
*/
static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
struct slab_depot_state_2_0 state)
@@ -570,6 +580,9 @@ static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
/**
* decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
+ * @buffer: The buffer being decoded.
+ * @offset: The offset to start decoding from.
+ * @state: A pointer to a slab depot state to hold the decoded result.
*
* Return: VDO_SUCCESS or an error code.
*/
@@ -1156,6 +1169,9 @@ static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_componen
/**
* decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
+ * @buffer: The buffer being decoded.
+ * @offset: The offset to start decoding from.
+ * @component: The vdo component structure to decode into.
*
* Return: VDO_SUCCESS or an error.
*/
@@ -1290,7 +1306,7 @@ void vdo_destroy_component_states(struct vdo_component_states *states)
* understand.
* @buffer: The buffer being decoded.
* @offset: The offset to start decoding from.
- * @geometry: The vdo geometry
+ * @geometry: The vdo geometry.
* @states: An object to hold the successfully decoded state.
*
* Return: VDO_SUCCESS or an error.
@@ -1329,7 +1345,7 @@ static int __must_check decode_components(u8 *buffer, size_t *offset,
/**
* vdo_decode_component_states() - Decode the payload of a super block.
* @buffer: The buffer containing the encoded super block contents.
- * @geometry: The vdo geometry
+ * @geometry: The vdo geometry.
* @states: A pointer to hold the decoded states.
*
* Return: VDO_SUCCESS or an error.
@@ -1383,6 +1399,9 @@ int vdo_validate_component_states(struct vdo_component_states *states,
/**
* vdo_encode_component_states() - Encode the state of all vdo components in the super block.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset into the buffer to start the encoding.
+ * @states: The component states to encode.
*/
static void vdo_encode_component_states(u8 *buffer, size_t *offset,
const struct vdo_component_states *states)
@@ -1402,6 +1421,8 @@ static void vdo_encode_component_states(u8 *buffer, size_t *offset,
/**
* vdo_encode_super_block() - Encode a super block into its on-disk representation.
+ * @buffer: A buffer to store the encoding.
+ * @states: The component states to encode.
*/
void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
{
@@ -1426,6 +1447,7 @@ void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
/**
* vdo_decode_super_block() - Decode a super block from its on-disk representation.
+ * @buffer: The buffer to decode from.
*/
int vdo_decode_super_block(u8 *buffer)
{
diff --git a/drivers/md/dm-vdo/flush.c b/drivers/md/dm-vdo/flush.c
index dd4fdee2ca0c..82a259ef1601 100644
--- a/drivers/md/dm-vdo/flush.c
+++ b/drivers/md/dm-vdo/flush.c
@@ -522,11 +522,7 @@ static void vdo_complete_flush(struct vdo_flush *flush)
vdo_enqueue_completion(completion, BIO_Q_FLUSH_PRIORITY);
}
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{
check_for_drain_complete(container_of(state, struct flusher, state));
diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c
index 0613c82bbe8e..8a79b33b8b09 100644
--- a/drivers/md/dm-vdo/funnel-workqueue.c
+++ b/drivers/md/dm-vdo/funnel-workqueue.c
@@ -372,6 +372,13 @@ static int make_simple_work_queue(const char *thread_name_prefix, const char *na
/**
* vdo_make_work_queue() - Create a work queue; if multiple threads are requested, completions will
* be distributed to them in round-robin fashion.
+ * @thread_name_prefix: A prefix for the thread names to identify them as a vdo thread.
+ * @name: A base name to identify this queue.
+ * @owner: The vdo_thread structure to manage this queue.
+ * @type: The type of queue to create.
+ * @thread_count: The number of actual threads handling this queue.
+ * @thread_privates: An array of private contexts, one for each thread; may be NULL.
+ * @queue_ptr: A pointer to return the new work queue.
*
* Each queue is associated with a struct vdo_thread which has a single vdo thread id. Regardless
* of the actual number of queues and threads allocated here, code outside of the queue
diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c
index 11d47770b54d..e26d75f8366d 100644
--- a/drivers/md/dm-vdo/io-submitter.c
+++ b/drivers/md/dm-vdo/io-submitter.c
@@ -118,6 +118,7 @@ static void send_bio_to_device(struct vio *vio, struct bio *bio)
/**
* vdo_submit_vio() - Submits a vio's bio to the underlying block device. May block if the device
* is busy. This callback should be used by vios which did not attempt to merge.
+ * @completion: The vio to submit.
*/
void vdo_submit_vio(struct vdo_completion *completion)
{
@@ -133,7 +134,7 @@ void vdo_submit_vio(struct vdo_completion *completion)
* The list will always contain at least one entry (the bio for the vio on which it is called), but
* other bios may have been merged with it as well.
*
- * Return: bio The head of the bio list to submit.
+ * Return: The head of the bio list to submit.
*/
static struct bio *get_bio_list(struct vio *vio)
{
@@ -158,6 +159,7 @@ static struct bio *get_bio_list(struct vio *vio)
/**
* submit_data_vio() - Submit a data_vio's bio to the storage below along with
* any bios that have been merged with it.
+ * @completion: The vio to submit.
*
* Context: This call may block and so should only be called from a bio thread.
*/
@@ -184,7 +186,7 @@ static void submit_data_vio(struct vdo_completion *completion)
* There are two types of merging possible, forward and backward, which are distinguished by a flag
* that uses kernel elevator terminology.
*
- * Return: the vio to merge to, NULL if no merging is possible.
+ * Return: The vio to merge to, NULL if no merging is possible.
*/
static struct vio *get_mergeable_locked(struct int_map *map, struct vio *vio,
bool back_merge)
@@ -262,7 +264,7 @@ static int merge_to_next_head(struct int_map *bio_map, struct vio *vio,
*
* Currently this is only used for data_vios, but is broken out for future use with metadata vios.
*
- * Return: whether or not the vio was merged.
+ * Return: Whether or not the vio was merged.
*/
static bool try_bio_map_merge(struct vio *vio)
{
@@ -306,7 +308,7 @@ static bool try_bio_map_merge(struct vio *vio)
/**
* vdo_submit_data_vio() - Submit I/O for a data_vio.
- * @data_vio: the data_vio for which to issue I/O.
+ * @data_vio: The data_vio for which to issue I/O.
*
* If possible, this I/O will be merged other pending I/Os. Otherwise, the data_vio will be sent to
* the appropriate bio zone directly.
@@ -321,13 +323,13 @@ void vdo_submit_data_vio(struct data_vio *data_vio)
/**
* __submit_metadata_vio() - Submit I/O for a metadata vio.
- * @vio: the vio for which to issue I/O
- * @physical: the physical block number to read or write
- * @callback: the bio endio function which will be called after the I/O completes
- * @error_handler: the handler for submission or I/O errors (may be NULL)
- * @operation: the type of I/O to perform
- * @data: the buffer to read or write (may be NULL)
- * @size: the I/O amount in bytes
+ * @vio: The vio for which to issue I/O.
+ * @physical: The physical block number to read or write.
+ * @callback: The bio endio function which will be called after the I/O completes.
+ * @error_handler: The handler for submission or I/O errors; may be NULL.
+ * @operation: The type of I/O to perform.
+ * @data: The buffer to read or write; may be NULL.
+ * @size: The I/O amount in bytes.
*
* The vio is enqueued on a vdo bio queue so that bio submission (which may block) does not block
* other vdo threads.
@@ -441,7 +443,7 @@ int vdo_make_io_submitter(unsigned int thread_count, unsigned int rotation_inter
/**
* vdo_cleanup_io_submitter() - Tear down the io_submitter fields as needed for a physical layer.
- * @io_submitter: The I/O submitter data to tear down (may be NULL).
+ * @io_submitter: The I/O submitter data to tear down; may be NULL.
*/
void vdo_cleanup_io_submitter(struct io_submitter *io_submitter)
{
diff --git a/drivers/md/dm-vdo/logical-zone.c b/drivers/md/dm-vdo/logical-zone.c
index 026f031ffc9e..0a27e60a9dfd 100644
--- a/drivers/md/dm-vdo/logical-zone.c
+++ b/drivers/md/dm-vdo/logical-zone.c
@@ -159,21 +159,13 @@ static void check_for_drain_complete(struct logical_zone *zone)
vdo_finish_draining(&zone->state);
}
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{
check_for_drain_complete(container_of(state, struct logical_zone, state));
}
-/**
- * drain_logical_zone() - Drain a logical zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
static void drain_logical_zone(void *context, zone_count_t zone_number,
struct vdo_completion *parent)
{
@@ -192,11 +184,7 @@ void vdo_drain_logical_zones(struct logical_zones *zones,
parent);
}
-/**
- * resume_logical_zone() - Resume a logical zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
static void resume_logical_zone(void *context, zone_count_t zone_number,
struct vdo_completion *parent)
{
@@ -356,7 +344,7 @@ struct physical_zone *vdo_get_next_allocation_zone(struct logical_zone *zone)
/**
* vdo_dump_logical_zone() - Dump information about a logical zone to the log for debugging.
- * @zone: The zone to dump
+ * @zone: The zone to dump.
*
* Context: the information is dumped in a thread-unsafe fashion.
*
diff --git a/drivers/md/dm-vdo/packer.c b/drivers/md/dm-vdo/packer.c
index f70f5edabc10..666be6d557e1 100644
--- a/drivers/md/dm-vdo/packer.c
+++ b/drivers/md/dm-vdo/packer.c
@@ -35,10 +35,10 @@ static const struct version_number COMPRESSED_BLOCK_1_0 = {
/**
* vdo_get_compressed_block_fragment() - Get a reference to a compressed fragment from a compressed
* block.
- * @mapping_state [in] The mapping state for the look up.
- * @compressed_block [in] The compressed block that was read from disk.
- * @fragment_offset [out] The offset of the fragment within a compressed block.
- * @fragment_size [out] The size of the fragment.
+ * @mapping_state: The mapping state describing the fragment.
+ * @block: The compressed block that was read from disk.
+ * @fragment_offset: The offset of the fragment within the compressed block.
+ * @fragment_size: The size of the fragment.
*
* Return: If a valid compressed fragment is found, VDO_SUCCESS; otherwise, VDO_INVALID_FRAGMENT if
* the fragment is invalid.
@@ -382,6 +382,7 @@ static void initialize_compressed_block(struct compressed_block *block, u16 size
* @compression: The agent's compression_state to pack in to.
* @data_vio: The data_vio to pack.
* @offset: The offset into the compressed block at which to pack the fragment.
+ * @slot: The slot number in the compressed block.
* @block: The compressed block which will be written out when batch is fully packed.
*
* Return: The new amount of space used.
@@ -705,11 +706,7 @@ void vdo_increment_packer_flush_generation(struct packer *packer)
vdo_flush_packer(packer);
}
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{
struct packer *packer = container_of(state, struct packer, state);
diff --git a/drivers/md/dm-vdo/physical-zone.c b/drivers/md/dm-vdo/physical-zone.c
index a43b5c45fab7..686eb7d714e6 100644
--- a/drivers/md/dm-vdo/physical-zone.c
+++ b/drivers/md/dm-vdo/physical-zone.c
@@ -60,7 +60,7 @@ static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type
* vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock.
* @lock: The lock to check.
*
- * Return: true if the lock is a read lock.
+ * Return: True if the lock is a read lock.
*/
bool vdo_is_pbn_read_lock(const struct pbn_lock *lock)
{
@@ -75,6 +75,7 @@ static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type t
/**
* vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock.
* @lock: The PBN write lock to downgrade.
+ * @compressed_write: True if the written block was a compressed block.
*
* The lock holder count is cleared and the caller is responsible for setting the new count.
*/
@@ -582,7 +583,7 @@ static bool continue_allocating(struct data_vio *data_vio)
* that fails try the next if possible.
* @data_vio: The data_vio needing an allocation.
*
- * Return: true if a block was allocated, if not the data_vio will have been dispatched so the
+ * Return: True if a block was allocated, if not the data_vio will have been dispatched so the
* caller must not touch it.
*/
bool vdo_allocate_block_in_zone(struct data_vio *data_vio)
diff --git a/drivers/md/dm-vdo/recovery-journal.c b/drivers/md/dm-vdo/recovery-journal.c
index de58184f538f..9cc0f0ff1664 100644
--- a/drivers/md/dm-vdo/recovery-journal.c
+++ b/drivers/md/dm-vdo/recovery-journal.c
@@ -109,7 +109,7 @@ static atomic_t *get_decrement_counter(struct recovery_journal *journal,
* @journal: The recovery journal.
* @lock_number: The lock to check.
*
- * Return: true if the journal zone is locked.
+ * Return: True if the journal zone is locked.
*/
static bool is_journal_zone_locked(struct recovery_journal *journal,
block_count_t lock_number)
@@ -217,7 +217,7 @@ static struct recovery_journal_block * __must_check pop_free_list(struct recover
* Indicates it has any uncommitted entries, which includes both entries not written and entries
* written but not yet acknowledged.
*
- * Return: true if the block has any uncommitted entries.
+ * Return: True if the block has any uncommitted entries.
*/
static inline bool __must_check is_block_dirty(const struct recovery_journal_block *block)
{
@@ -228,7 +228,7 @@ static inline bool __must_check is_block_dirty(const struct recovery_journal_blo
* is_block_empty() - Check whether a journal block is empty.
* @block: The block to check.
*
- * Return: true if the block has no entries.
+ * Return: True if the block has no entries.
*/
static inline bool __must_check is_block_empty(const struct recovery_journal_block *block)
{
@@ -239,7 +239,7 @@ static inline bool __must_check is_block_empty(const struct recovery_journal_blo
* is_block_full() - Check whether a journal block is full.
* @block: The block to check.
*
- * Return: true if the block is full.
+ * Return: True if the block is full.
*/
static inline bool __must_check is_block_full(const struct recovery_journal_block *block)
{
@@ -260,6 +260,8 @@ static void assert_on_journal_thread(struct recovery_journal *journal,
/**
* continue_waiter() - Release a data_vio from the journal.
+ * @waiter: The data_vio waiting on journal activity.
+ * @context: The result of the journal operation.
*
* Invoked whenever a data_vio is to be released from the journal, either because its entry was
* committed to disk, or because there was an error. Implements waiter_callback_fn.
@@ -273,7 +275,7 @@ static void continue_waiter(struct vdo_waiter *waiter, void *context)
* has_block_waiters() - Check whether the journal has any waiters on any blocks.
* @journal: The journal in question.
*
- * Return: true if any block has a waiter.
+ * Return: True if any block has a waiter.
*/
static inline bool has_block_waiters(struct recovery_journal *journal)
{
@@ -296,7 +298,7 @@ static void notify_commit_waiters(struct recovery_journal *journal);
* suspend_lock_counter() - Prevent the lock counter from notifying.
* @counter: The counter.
*
- * Return: true if the lock counter was not notifying and hence the suspend was efficacious.
+ * Return: True if the lock counter was not notifying and hence the suspend was efficacious.
*/
static bool suspend_lock_counter(struct lock_counter *counter)
{
@@ -416,7 +418,7 @@ sequence_number_t vdo_get_recovery_journal_current_sequence_number(struct recove
*
* The head is the lowest sequence number of the block map head and the slab journal head.
*
- * Return: the head of the journal.
+ * Return: The head of the journal.
*/
static inline sequence_number_t get_recovery_journal_head(const struct recovery_journal *journal)
{
@@ -535,7 +537,7 @@ static void initialize_journal_state(struct recovery_journal *journal)
* vdo_get_recovery_journal_length() - Get the number of usable recovery journal blocks.
* @journal_size: The size of the recovery journal in blocks.
*
- * Return: the number of recovery journal blocks usable for entries.
+ * Return: The number of recovery journal blocks usable for entries.
*/
block_count_t vdo_get_recovery_journal_length(block_count_t journal_size)
{
@@ -1078,6 +1080,8 @@ static void update_usages(struct recovery_journal *journal, struct data_vio *dat
/**
* assign_entry() - Assign an entry waiter to the active block.
+ * @waiter: The data_vio.
+ * @context: The recovery journal block.
*
* Implements waiter_callback_fn.
*/
@@ -1165,6 +1169,8 @@ static void recycle_journal_block(struct recovery_journal_block *block)
/**
* continue_committed_waiter() - invoked whenever a VIO is to be released from the journal because
* its entry was committed to disk.
+ * @waiter: The data_vio waiting on a journal write.
+ * @context: A pointer to the recovery journal.
*
* Implements waiter_callback_fn.
*/
@@ -1362,6 +1368,8 @@ static void add_queued_recovery_entries(struct recovery_journal_block *block)
/**
* write_block() - Issue a block for writing.
+ * @waiter: The recovery journal block to write.
+ * @context: Not used.
*
* Implements waiter_callback_fn.
*/
@@ -1611,11 +1619,7 @@ void vdo_release_journal_entry_lock(struct recovery_journal *journal,
smp_mb__after_atomic();
}
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{
check_for_drain_complete(container_of(state, struct recovery_journal, state));
diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c
index f3d80ff7bef5..034ecaa51f48 100644
--- a/drivers/md/dm-vdo/slab-depot.c
+++ b/drivers/md/dm-vdo/slab-depot.c
@@ -40,7 +40,7 @@ static const bool NORMAL_OPERATION = true;
/**
* get_lock() - Get the lock object for a slab journal block by sequence number.
- * @journal: vdo_slab journal to retrieve from.
+ * @journal: The vdo_slab journal to retrieve from.
* @sequence_number: Sequence number of the block.
*
* Return: The lock object for the given sequence number.
@@ -110,7 +110,7 @@ static void initialize_journal_state(struct slab_journal *journal)
* block_is_full() - Check whether a journal block is full.
* @journal: The slab journal for the block.
*
- * Return: true if the tail block is full.
+ * Return: True if the tail block is full.
*/
static bool __must_check block_is_full(struct slab_journal *journal)
{
@@ -127,10 +127,11 @@ static void release_journal_locks(struct vdo_waiter *waiter, void *context);
/**
* is_slab_journal_blank() - Check whether a slab's journal is blank.
+ * @slab: The slab to check.
*
* A slab journal is blank if it has never had any entries recorded in it.
*
- * Return: true if the slab's journal has never been modified.
+ * Return: True if the slab's journal has never been modified.
*/
static bool is_slab_journal_blank(const struct vdo_slab *slab)
{
@@ -227,6 +228,7 @@ static u8 __must_check compute_fullness_hint(struct slab_depot *depot,
/**
* check_summary_drain_complete() - Check whether an allocators summary has finished draining.
+ * @allocator: The allocator to check.
*/
static void check_summary_drain_complete(struct block_allocator *allocator)
{
@@ -349,7 +351,7 @@ static void launch_write(struct slab_summary_block *block)
/**
* update_slab_summary_entry() - Update the entry for a slab.
- * @slab: The slab whose entry is to be updated
+ * @slab: The slab whose entry is to be updated.
* @waiter: The waiter that is updating the summary.
* @tail_block_offset: The offset of the slab journal's tail block.
* @load_ref_counts: Whether the reference counts must be loaded from disk on the vdo load.
@@ -654,6 +656,7 @@ static void update_tail_block_location(struct slab_journal *journal)
/**
* reopen_slab_journal() - Reopen a slab's journal by emptying it and then adding pending entries.
+ * @slab: The slab to reopen.
*/
static void reopen_slab_journal(struct vdo_slab *slab)
{
@@ -839,8 +842,6 @@ static void commit_tail(struct slab_journal *journal)
* @sbn: The slab block number of the entry to encode.
* @operation: The type of the entry.
* @increment: True if this is an increment.
- *
- * Exposed for unit tests.
*/
static void encode_slab_journal_entry(struct slab_journal_block_header *tail_header,
slab_journal_payload *payload,
@@ -951,7 +952,7 @@ static inline block_count_t journal_length(const struct slab_journal *journal)
* @parent: The completion to notify when there is space to add the entry if the entry could not be
* added immediately.
*
- * Return: true if the entry was added immediately.
+ * Return: True if the entry was added immediately.
*/
bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t pbn,
enum journal_operation operation, bool increment,
@@ -1003,7 +1004,7 @@ bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t
* requires_reaping() - Check whether the journal must be reaped before adding new entries.
* @journal: The journal to check.
*
- * Return: true if the journal must be reaped.
+ * Return: True if the journal must be reaped.
*/
static bool requires_reaping(const struct slab_journal *journal)
{
@@ -1275,6 +1276,8 @@ static void dirty_block(struct reference_block *block)
/**
* get_reference_block() - Get the reference block that covers the given block index.
+ * @slab: The slab containing the references.
+ * @index: The index of the physical block.
*/
static struct reference_block * __must_check get_reference_block(struct vdo_slab *slab,
slab_block_number index)
@@ -1379,7 +1382,8 @@ static void prioritize_slab(struct vdo_slab *slab)
/**
* adjust_free_block_count() - Adjust the free block count and (if needed) reprioritize the slab.
- * @incremented: true if the free block count went up.
+ * @slab: The slab.
+ * @incremented: True if the free block count went up.
*/
static void adjust_free_block_count(struct vdo_slab *slab, bool incremented)
{
@@ -1885,6 +1889,7 @@ static void add_entries(struct slab_journal *journal)
/**
* reset_search_cursor() - Reset the free block search back to the first reference counter in the
* first reference block of a slab.
+ * @slab: The slab.
*/
static void reset_search_cursor(struct vdo_slab *slab)
{
@@ -1892,17 +1897,17 @@ static void reset_search_cursor(struct vdo_slab *slab)
cursor->block = cursor->first_block;
cursor->index = 0;
- /* Unit tests have slabs with only one reference block (and it's a runt). */
cursor->end_index = min_t(u32, COUNTS_PER_BLOCK, slab->block_count);
}
/**
* advance_search_cursor() - Advance the search cursor to the start of the next reference block in
- * a slab,
+ * a slab.
+ * @slab: The slab.
*
* Wraps around to the first reference block if the current block is the last reference block.
*
- * Return: true unless the cursor was at the last reference block.
+ * Return: True unless the cursor was at the last reference block.
*/
static bool advance_search_cursor(struct vdo_slab *slab)
{
@@ -1933,6 +1938,9 @@ static bool advance_search_cursor(struct vdo_slab *slab)
/**
* vdo_adjust_reference_count_for_rebuild() - Adjust the reference count of a block during rebuild.
+ * @depot: The slab depot.
+ * @pbn: The physical block number to adjust.
+ * @operation: The type opf operation.
*
* Return: VDO_SUCCESS or an error.
*/
@@ -2038,9 +2046,7 @@ static inline slab_block_number find_zero_byte_in_word(const u8 *word_ptr,
* @slab: The slab counters to scan.
* @index_ptr: A pointer to hold the array index of the free block.
*
- * Exposed for unit testing.
- *
- * Return: true if a free block was found in the specified range.
+ * Return: True if a free block was found in the specified range.
*/
static bool find_free_block(const struct vdo_slab *slab, slab_block_number *index_ptr)
{
@@ -2097,7 +2103,7 @@ static bool find_free_block(const struct vdo_slab *slab, slab_block_number *inde
* @slab: The slab to search.
* @free_index_ptr: A pointer to receive the array index of the zero reference count.
*
- * Return: true if an unreferenced counter was found.
+ * Return: True if an unreferenced counter was found.
*/
static bool search_current_reference_block(const struct vdo_slab *slab,
slab_block_number *free_index_ptr)
@@ -2116,7 +2122,7 @@ static bool search_current_reference_block(const struct vdo_slab *slab,
* counter index saved in the search cursor and searching up to the end of the last reference
* block. The search does not wrap.
*
- * Return: true if an unreferenced counter was found.
+ * Return: True if an unreferenced counter was found.
*/
static bool search_reference_blocks(struct vdo_slab *slab,
slab_block_number *free_index_ptr)
@@ -2136,6 +2142,8 @@ static bool search_reference_blocks(struct vdo_slab *slab,
/**
* make_provisional_reference() - Do the bookkeeping for making a provisional reference.
+ * @slab: The slab.
+ * @block_number: The index for the physical block to reference.
*/
static void make_provisional_reference(struct vdo_slab *slab,
slab_block_number block_number)
@@ -2155,6 +2163,7 @@ static void make_provisional_reference(struct vdo_slab *slab,
/**
* dirty_all_reference_blocks() - Mark all reference count blocks in a slab as dirty.
+ * @slab: The slab.
*/
static void dirty_all_reference_blocks(struct vdo_slab *slab)
{
@@ -2173,10 +2182,10 @@ static inline bool journal_points_equal(struct journal_point first,
/**
* match_bytes() - Check an 8-byte word for bytes matching the value specified
- * @input: A word to examine the bytes of
- * @match: The byte value sought
+ * @input: A word to examine the bytes of.
+ * @match: The byte value sought.
*
- * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
+ * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise.
*/
static inline u64 match_bytes(u64 input, u8 match)
{
@@ -2191,12 +2200,12 @@ static inline u64 match_bytes(u64 input, u8 match)
/**
* count_valid_references() - Process a newly loaded refcount array
- * @counters: the array of counters from a metadata block
+ * @counters: The array of counters from a metadata block.
*
- * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
- * cleaned up at shutdown, changing them internally to "empty".
+ * Scan an 8-byte-aligned array of counters, fixing up any provisional values that
+ * weren't cleaned up at shutdown, changing them internally to zero.
*
- * Return: the number of blocks that are referenced (counters not "empty")
+ * Return: The number of blocks with a non-zero reference count.
*/
static unsigned int count_valid_references(vdo_refcount_t *counters)
{
@@ -2351,6 +2360,7 @@ static void load_reference_block_group(struct vdo_waiter *waiter, void *context)
/**
* load_reference_blocks() - Load a slab's reference blocks from the underlying storage into a
* pre-allocated reference counter.
+ * @slab: The slab.
*/
static void load_reference_blocks(struct vdo_slab *slab)
{
@@ -2375,6 +2385,7 @@ static void load_reference_blocks(struct vdo_slab *slab)
/**
* drain_slab() - Drain all reference count I/O.
+ * @slab: The slab.
*
* Depending upon the type of drain being performed (as recorded in the ref_count's vdo_slab), the
* reference blocks may be loaded from disk or dirty reference blocks may be written out.
@@ -2564,6 +2575,7 @@ static void read_slab_journal_tail(struct vdo_waiter *waiter, void *context)
/**
* load_slab_journal() - Load a slab's journal by reading the journal's tail.
+ * @slab: The slab.
*/
static void load_slab_journal(struct vdo_slab *slab)
{
@@ -2663,11 +2675,7 @@ static void queue_slab(struct vdo_slab *slab)
prioritize_slab(slab);
}
-/**
- * initiate_slab_action() - Initiate a slab action.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
static void initiate_slab_action(struct admin_state *state)
{
struct vdo_slab *slab = container_of(state, struct vdo_slab, state);
@@ -2720,7 +2728,7 @@ static struct vdo_slab *get_next_slab(struct slab_scrubber *scrubber)
* has_slabs_to_scrub() - Check whether a scrubber has slabs to scrub.
* @scrubber: The scrubber to check.
*
- * Return: true if the scrubber has slabs to scrub.
+ * Return: True if the scrubber has slabs to scrub.
*/
static inline bool __must_check has_slabs_to_scrub(struct slab_scrubber *scrubber)
{
@@ -2741,6 +2749,7 @@ static void uninitialize_scrubber_vio(struct slab_scrubber *scrubber)
* finish_scrubbing() - Stop scrubbing, either because there are no more slabs to scrub or because
* there's been an error.
* @scrubber: The scrubber.
+ * @result: The result of the scrubbing operation.
*/
static void finish_scrubbing(struct slab_scrubber *scrubber, int result)
{
@@ -3132,11 +3141,13 @@ static struct vdo_slab *next_slab(struct slab_iterator *iterator)
/**
* abort_waiter() - Abort vios waiting to make journal entries when read-only.
+ * @waiter: A waiting data_vio.
+ * @context: Not used.
*
* This callback is invoked on all vios waiting to make slab journal entries after the VDO has gone
* into read-only mode. Implements waiter_callback_fn.
*/
-static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused)
+static void abort_waiter(struct vdo_waiter *waiter, void __always_unused *context)
{
struct reference_updater *updater =
container_of(waiter, struct reference_updater, waiter);
@@ -3536,7 +3547,7 @@ static void initiate_load(struct admin_state *state)
/**
* vdo_notify_slab_journals_are_recovered() - Inform a block allocator that its slab journals have
* been recovered from the recovery journal.
- * @completion The allocator completion
+ * @completion: The allocator completion.
*/
void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion)
{
@@ -3775,7 +3786,7 @@ static int initialize_slab_journal(struct vdo_slab *slab)
* in the slab.
* @allocator: The block allocator to which the slab belongs.
* @slab_number: The slab number of the slab.
- * @is_new: true if this slab is being allocated as part of a resize.
+ * @is_new: True if this slab is being allocated as part of a resize.
* @slab_ptr: A pointer to receive the new slab.
*
* Return: VDO_SUCCESS or an error code.
@@ -3894,11 +3905,7 @@ void vdo_abandon_new_slabs(struct slab_depot *depot)
vdo_free(vdo_forget(depot->new_slabs));
}
-/**
- * get_allocator_thread_id() - Get the ID of the thread on which a given allocator operates.
- *
- * Implements vdo_zone_thread_getter_fn.
- */
+/** Implements vdo_zone_thread_getter_fn. */
static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_number)
{
return ((struct slab_depot *) context)->allocators[zone_number].thread_id;
@@ -3911,7 +3918,7 @@ static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_numb
* @recovery_lock: The sequence number of the recovery journal block whose locks should be
* released.
*
- * Return: true if the journal does hold a lock on the specified block (which it will release).
+ * Return: True if the journal released a lock on the specified block.
*/
static bool __must_check release_recovery_journal_lock(struct slab_journal *journal,
sequence_number_t recovery_lock)
@@ -3955,6 +3962,8 @@ static void release_tail_block_locks(void *context, zone_count_t zone_number,
/**
* prepare_for_tail_block_commit() - Prepare to commit oldest tail blocks.
+ * @context: The slab depot.
+ * @parent: The parent operation.
*
* Implements vdo_action_preamble_fn.
*/
@@ -3968,6 +3977,7 @@ static void prepare_for_tail_block_commit(void *context, struct vdo_completion *
/**
* schedule_tail_block_commit() - Schedule a tail block commit if necessary.
+ * @context: The slab depot.
*
* This method should not be called directly. Rather, call vdo_schedule_default_action() on the
* depot's action manager.
@@ -4361,6 +4371,7 @@ struct slab_depot_state_2_0 vdo_record_slab_depot(const struct slab_depot *depot
/**
* vdo_allocate_reference_counters() - Allocate the reference counters for all slabs in the depot.
+ * @depot: The slab depot.
*
* Context: This method may be called only before entering normal operation from the load thread.
*
@@ -4615,7 +4626,9 @@ static void load_summary_endio(struct bio *bio)
}
/**
- * load_slab_summary() - The preamble of a load operation.
+ * load_slab_summary() - Load the slab summary before the slab data.
+ * @context: The slab depot.
+ * @parent: The load operation.
*
* Implements vdo_action_preamble_fn.
*/
@@ -4731,7 +4744,7 @@ void vdo_update_slab_depot_size(struct slab_depot *depot)
* vdo_prepare_to_grow_slab_depot() - Allocate new memory needed for a resize of a slab depot to
* the given size.
* @depot: The depot to prepare to resize.
- * @partition: The new depot partition
+ * @partition: The new depot partition.
*
* Return: VDO_SUCCESS or an error.
*/
@@ -4781,6 +4794,7 @@ int vdo_prepare_to_grow_slab_depot(struct slab_depot *depot,
/**
* finish_registration() - Finish registering new slabs now that all of the allocators have
* received their new slabs.
+ * @context: The slab depot.
*
* Implements vdo_action_conclusion_fn.
*/
diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c
index 80b608674022..09fd0628d18c 100644
--- a/drivers/md/dm-vdo/vdo.c
+++ b/drivers/md/dm-vdo/vdo.c
@@ -181,6 +181,8 @@ static void assign_thread_ids(struct thread_config *config,
/**
* initialize_thread_config() - Initialize the thread mapping
+ * @counts: The number and types of threads to create.
+ * @config: The thread_config to initialize.
*
* If the logical, physical, and hash zone counts are all 0, a single thread will be shared by all
* three plus the packer and recovery journal. Otherwise, there must be at least one of each type,
@@ -884,6 +886,7 @@ const struct admin_state_code *vdo_get_admin_state(const struct vdo *vdo)
/**
* record_vdo() - Record the state of the VDO for encoding in the super block.
+ * @vdo: The vdo.
*/
static void record_vdo(struct vdo *vdo)
{
@@ -1277,7 +1280,7 @@ void vdo_enter_read_only_mode(struct vdo *vdo, int error_code)
* vdo_is_read_only() - Check whether the VDO is read-only.
* @vdo: The vdo.
*
- * Return: true if the vdo is read-only.
+ * Return: True if the vdo is read-only.
*
* This method may be called from any thread, as opposed to examining the VDO's state field which
* is only safe to check from the admin thread.
@@ -1291,7 +1294,7 @@ bool vdo_is_read_only(struct vdo *vdo)
* vdo_in_read_only_mode() - Check whether a vdo is in read-only mode.
* @vdo: The vdo to query.
*
- * Return: true if the vdo is in read-only mode.
+ * Return: True if the vdo is in read-only mode.
*/
bool vdo_in_read_only_mode(const struct vdo *vdo)
{
@@ -1302,7 +1305,7 @@ bool vdo_in_read_only_mode(const struct vdo *vdo)
* vdo_in_recovery_mode() - Check whether the vdo is in recovery mode.
* @vdo: The vdo to query.
*
- * Return: true if the vdo is in recovery mode.
+ * Return: True if the vdo is in recovery mode.
*/
bool vdo_in_recovery_mode(const struct vdo *vdo)
{
diff --git a/drivers/md/dm-vdo/vdo.h b/drivers/md/dm-vdo/vdo.h
index 483ae873e002..1aaba73997b7 100644
--- a/drivers/md/dm-vdo/vdo.h
+++ b/drivers/md/dm-vdo/vdo.h
@@ -279,8 +279,10 @@ static inline bool vdo_uses_bio_ack_queue(struct vdo *vdo)
/**
* typedef vdo_filter_fn - Method type for vdo matching methods.
+ * @vdo: The vdo to match.
+ * @context: A parameter for the filter to use.
*
- * A filter function returns false if the vdo doesn't match.
+ * Return: True if the vdo matches the filter criteria, false if it doesn't.
*/
typedef bool (*vdo_filter_fn)(struct vdo *vdo, const void *context);
diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c
index 8fc22fb14196..5ffc867d9c5e 100644
--- a/drivers/md/dm-vdo/vio.c
+++ b/drivers/md/dm-vdo/vio.c
@@ -398,8 +398,9 @@ void free_vio_pool(struct vio_pool *pool)
/**
* is_vio_pool_busy() - Check whether an vio pool has outstanding entries.
+ * @pool: The vio pool.
*
- * Return: true if the pool is busy.
+ * Return: True if the pool is busy.
*/
bool is_vio_pool_busy(struct vio_pool *pool)
{
diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h
index 4bfcb21901f1..7a8a6819aec4 100644
--- a/drivers/md/dm-vdo/vio.h
+++ b/drivers/md/dm-vdo/vio.h
@@ -156,8 +156,7 @@ static inline enum vdo_completion_priority get_metadata_priority(struct vio *vio
/**
* continue_vio() - Enqueue a vio to run its next callback.
* @vio: The vio to continue.
- *
- * Return: The result of the current operation.
+ * @result: The result of the current operation.
*/
static inline void continue_vio(struct vio *vio, int result)
{
@@ -172,6 +171,9 @@ void vdo_count_completed_bios(struct bio *bio);
/**
* continue_vio_after_io() - Continue a vio now that its I/O has returned.
+ * @vio: The vio to continue.
+ * @callback: The next operation for this vio.
+ * @thread: Which thread to run the next operation on.
*/
static inline void continue_vio_after_io(struct vio *vio, vdo_action_fn callback,
thread_id_t thread)
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 72047b47a7a0..c79de517afee 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -177,9 +177,11 @@ error:
if (r < 0 && neras)
DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
v->data_dev->name, (unsigned long long)rsb, r);
- else if (r > 0)
+ else if (r > 0) {
DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
v->data_dev->name, (unsigned long long)rsb, r);
+ atomic64_inc(&v->fec->corrected);
+ }
return r;
}
@@ -188,14 +190,13 @@ error:
* Locate data block erasures using verity hashes.
*/
static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
- u8 *want_digest, u8 *data)
+ const u8 *want_digest, const u8 *data)
{
if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io))))
+ io->tmp_digest)))
return 0;
- return memcmp(verity_io_real_digest(v, io), want_digest,
- v->digest_size) != 0;
+ return memcmp(io->tmp_digest, want_digest, v->digest_size) != 0;
}
/*
@@ -328,7 +329,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
if (fio->bufs[n])
continue;
- fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT);
+ fio->bufs[n] = kmem_cache_alloc(v->fec->cache, GFP_NOWAIT);
/* we can manage with even one buffer if necessary */
if (unlikely(!fio->bufs[n]))
break;
@@ -362,7 +363,7 @@ static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
*/
static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
- bool use_erasures)
+ const u8 *want_digest, bool use_erasures)
{
int r, neras = 0;
unsigned int pos;
@@ -388,12 +389,11 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io));
+ io->tmp_digest);
if (unlikely(r < 0))
return r;
- if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
- v->digest_size)) {
+ if (memcmp(io->tmp_digest, want_digest, v->digest_size)) {
DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
v->data_dev->name, (unsigned long long)rsb, neras);
return -EILSEQ;
@@ -404,7 +404,8 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Correct errors in a block. Copies corrected block to dest. */
int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
- enum verity_block_type type, sector_t block, u8 *dest)
+ enum verity_block_type type, const u8 *want_digest,
+ sector_t block, u8 *dest)
{
int r;
struct dm_verity_fec_io *fio = fec_io(io);
@@ -413,10 +414,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
if (!verity_fec_is_enabled(v))
return -EOPNOTSUPP;
- if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
- DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
+ if (fio->level)
return -EIO;
- }
fio->level++;
@@ -447,9 +446,9 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
* them first. Do a second attempt with erasures if the corruption is
* bad enough.
*/
- r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+ r = fec_decode_rsb(v, io, fio, rsb, offset, want_digest, false);
if (r < 0) {
- r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+ r = fec_decode_rsb(v, io, fio, rsb, offset, want_digest, true);
if (r < 0)
goto done;
}
@@ -479,7 +478,8 @@ void verity_fec_finish_io(struct dm_verity_io *io)
mempool_free(fio->bufs[n], &f->prealloc_pool);
fec_for_each_extra_buffer(fio, n)
- mempool_free(fio->bufs[n], &f->extra_pool);
+ if (fio->bufs[n])
+ kmem_cache_free(f->cache, fio->bufs[n]);
mempool_free(fio->output, &f->output_pool);
}
@@ -531,7 +531,6 @@ void verity_fec_dtr(struct dm_verity *v)
mempool_exit(&f->rs_pool);
mempool_exit(&f->prealloc_pool);
- mempool_exit(&f->extra_pool);
mempool_exit(&f->output_pool);
kmem_cache_destroy(f->cache);
@@ -784,12 +783,6 @@ int verity_fec_ctr(struct dm_verity *v)
return ret;
}
- ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache);
- if (ret) {
- ti->error = "Cannot allocate FEC buffer extra pool";
- return ret;
- }
-
/* Preallocate an output buffer for each thread */
ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(),
1 << v->data_dev_block_bits);
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 09123a612953..5fd267873812 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -23,9 +23,6 @@
#define DM_VERITY_FEC_BUF_MAX \
(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
-/* maximum recursion level for verity_fec_decode */
-#define DM_VERITY_FEC_MAX_RECURSION 4
-
#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
#define DM_VERITY_OPT_FEC_START "fec_start"
@@ -45,9 +42,9 @@ struct dm_verity_fec {
unsigned char rsn; /* N of RS(M, N) */
mempool_t rs_pool; /* mempool for fio->rs */
mempool_t prealloc_pool; /* mempool for preallocated buffers */
- mempool_t extra_pool; /* mempool for extra buffers */
mempool_t output_pool; /* mempool for output */
struct kmem_cache *cache; /* cache for buffers */
+ atomic64_t corrected; /* corrected errors */
};
/* per-bio data */
@@ -68,8 +65,8 @@ struct dm_verity_fec_io {
extern bool verity_fec_is_enabled(struct dm_verity *v);
extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
- enum verity_block_type type, sector_t block,
- u8 *dest);
+ enum verity_block_type type, const u8 *want_digest,
+ sector_t block, u8 *dest);
extern unsigned int verity_fec_status_table(struct dm_verity *v, unsigned int sz,
char *result, unsigned int maxlen);
@@ -99,6 +96,7 @@ static inline bool verity_fec_is_enabled(struct dm_verity *v)
static inline int verity_fec_decode(struct dm_verity *v,
struct dm_verity_io *io,
enum verity_block_type type,
+ const u8 *want_digest,
sector_t block, u8 *dest)
{
return -EOPNOTSUPP;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 66a00a8ccb39..5c17472d7896 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -117,11 +117,25 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
const u8 *data, size_t len, u8 *digest)
{
- struct shash_desc *desc = &io->hash_desc;
+ struct shash_desc *desc;
int r;
+ if (likely(v->use_sha256_lib)) {
+ struct sha256_ctx *ctx = &io->hash_ctx.sha256;
+
+ /*
+ * Fast path using SHA-256 library. This is enabled only for
+ * verity version 1, where the salt is at the beginning.
+ */
+ *ctx = *v->initial_hashstate.sha256;
+ sha256_update(ctx, data, len);
+ sha256_final(ctx, digest);
+ return 0;
+ }
+
+ desc = &io->hash_ctx.shash;
desc->tfm = v->shash_tfm;
- if (unlikely(v->initial_hashstate == NULL)) {
+ if (unlikely(v->initial_hashstate.shash == NULL)) {
/* Version 0: salt at end */
r = crypto_shash_init(desc) ?:
crypto_shash_update(desc, data, len) ?:
@@ -129,7 +143,7 @@ int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
crypto_shash_final(desc, digest);
} else {
/* Version 1: salt at beginning */
- r = crypto_shash_import(desc, v->initial_hashstate) ?:
+ r = crypto_shash_import(desc, v->initial_hashstate.shash) ?:
crypto_shash_finup(desc, data, len, digest);
}
if (unlikely(r))
@@ -215,12 +229,12 @@ out:
* Verify hash of a metadata block pertaining to the specified data block
* ("block" argument) at a specified level ("level" argument).
*
- * On successful return, verity_io_want_digest(v, io) contains the hash value
- * for a lower tree level or for the data block (if we're at the lowest level).
+ * On successful return, want_digest contains the hash value for a lower tree
+ * level or for the data block (if we're at the lowest level).
*
* If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
* If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of verity_io_want_digest(v, io).
+ * against current value of want_digest.
*/
static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, int level, bool skip_unverified,
@@ -259,7 +273,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
if (IS_ERR(data))
return r;
if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
- hash_block, data) == 0) {
+ want_digest, hash_block, data) == 0) {
aux = dm_bufio_get_aux_data(buf);
aux->hash_verified = 1;
goto release_ok;
@@ -279,11 +293,11 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io));
+ io->tmp_digest);
if (unlikely(r < 0))
goto release_ret_r;
- if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
+ if (likely(memcmp(io->tmp_digest, want_digest,
v->digest_size) == 0))
aux->hash_verified = 1;
else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
@@ -294,7 +308,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = -EAGAIN;
goto release_ret_r;
} else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
- hash_block, data) == 0)
+ want_digest, hash_block, data) == 0)
aux->hash_verified = 1;
else if (verity_handle_err(v,
DM_VERITY_BLOCK_TYPE_METADATA,
@@ -358,7 +372,8 @@ out:
}
static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
- sector_t cur_block, u8 *dest)
+ const u8 *want_digest, sector_t cur_block,
+ u8 *dest)
{
struct page *page;
void *buffer;
@@ -382,12 +397,11 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
goto free_ret;
r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io));
+ io->tmp_digest);
if (unlikely(r))
goto free_ret;
- if (memcmp(verity_io_real_digest(v, io),
- verity_io_want_digest(v, io), v->digest_size)) {
+ if (memcmp(io->tmp_digest, want_digest, v->digest_size)) {
r = -EIO;
goto free_ret;
}
@@ -402,9 +416,13 @@ free_ret:
static int verity_handle_data_hash_mismatch(struct dm_verity *v,
struct dm_verity_io *io,
- struct bio *bio, sector_t blkno,
- u8 *data)
+ struct bio *bio,
+ struct pending_block *block)
{
+ const u8 *want_digest = block->want_digest;
+ sector_t blkno = block->blkno;
+ u8 *data = block->data;
+
if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
/*
* Error handling code (FEC included) cannot be run in the
@@ -412,14 +430,14 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
*/
return -EAGAIN;
}
- if (verity_recheck(v, io, blkno, data) == 0) {
+ if (verity_recheck(v, io, want_digest, blkno, data) == 0) {
if (v->validated_blocks)
set_bit(blkno, v->validated_blocks);
return 0;
}
#if defined(CONFIG_DM_VERITY_FEC)
- if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, blkno,
- data) == 0)
+ if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, want_digest,
+ blkno, data) == 0)
return 0;
#endif
if (bio->bi_status)
@@ -433,6 +451,58 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
return 0;
}
+static void verity_clear_pending_blocks(struct dm_verity_io *io)
+{
+ int i;
+
+ for (i = io->num_pending - 1; i >= 0; i--) {
+ kunmap_local(io->pending_blocks[i].data);
+ io->pending_blocks[i].data = NULL;
+ }
+ io->num_pending = 0;
+}
+
+static int verity_verify_pending_blocks(struct dm_verity *v,
+ struct dm_verity_io *io,
+ struct bio *bio)
+{
+ const unsigned int block_size = 1 << v->data_dev_block_bits;
+ int i, r;
+
+ if (io->num_pending == 2) {
+ /* num_pending == 2 implies that the algorithm is SHA-256 */
+ sha256_finup_2x(v->initial_hashstate.sha256,
+ io->pending_blocks[0].data,
+ io->pending_blocks[1].data, block_size,
+ io->pending_blocks[0].real_digest,
+ io->pending_blocks[1].real_digest);
+ } else {
+ for (i = 0; i < io->num_pending; i++) {
+ r = verity_hash(v, io, io->pending_blocks[i].data,
+ block_size,
+ io->pending_blocks[i].real_digest);
+ if (unlikely(r))
+ return r;
+ }
+ }
+
+ for (i = 0; i < io->num_pending; i++) {
+ struct pending_block *block = &io->pending_blocks[i];
+
+ if (likely(memcmp(block->real_digest, block->want_digest,
+ v->digest_size) == 0)) {
+ if (v->validated_blocks)
+ set_bit(block->blkno, v->validated_blocks);
+ } else {
+ r = verity_handle_data_hash_mismatch(v, io, bio, block);
+ if (unlikely(r))
+ return r;
+ }
+ }
+ verity_clear_pending_blocks(io);
+ return 0;
+}
+
/*
* Verify one "dm_verity_io" structure.
*/
@@ -440,10 +510,14 @@ static int verity_verify_io(struct dm_verity_io *io)
{
struct dm_verity *v = io->v;
const unsigned int block_size = 1 << v->data_dev_block_bits;
+ const int max_pending = v->use_sha256_finup_2x ? 2 : 1;
struct bvec_iter iter_copy;
struct bvec_iter *iter;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
unsigned int b;
+ int r;
+
+ io->num_pending = 0;
if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
/*
@@ -457,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io)
for (b = 0; b < io->n_blocks;
b++, bio_advance_iter(bio, iter, block_size)) {
- int r;
- sector_t cur_block = io->block + b;
+ sector_t blkno = io->block + b;
+ struct pending_block *block;
bool is_zero;
struct bio_vec bv;
void *data;
if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
- likely(test_bit(cur_block, v->validated_blocks)))
+ likely(test_bit(blkno, v->validated_blocks)))
continue;
- r = verity_hash_for_block(v, io, cur_block,
- verity_io_want_digest(v, io),
+ block = &io->pending_blocks[io->num_pending];
+
+ r = verity_hash_for_block(v, io, blkno, block->want_digest,
&is_zero);
if (unlikely(r < 0))
- return r;
+ goto error;
bv = bio_iter_iovec(bio, *iter);
if (unlikely(bv.bv_len < block_size)) {
@@ -482,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io)
* data block size to be greater than PAGE_SIZE.
*/
DMERR_LIMIT("unaligned io (data block spans pages)");
- return -EIO;
+ r = -EIO;
+ goto error;
}
data = bvec_kmap_local(&bv);
@@ -496,29 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io)
kunmap_local(data);
continue;
}
-
- r = verity_hash(v, io, data, block_size,
- verity_io_real_digest(v, io));
- if (unlikely(r < 0)) {
- kunmap_local(data);
- return r;
+ block->data = data;
+ block->blkno = blkno;
+ if (++io->num_pending == max_pending) {
+ r = verity_verify_pending_blocks(v, io, bio);
+ if (unlikely(r))
+ goto error;
}
+ }
- if (likely(memcmp(verity_io_real_digest(v, io),
- verity_io_want_digest(v, io), v->digest_size) == 0)) {
- if (v->validated_blocks)
- set_bit(cur_block, v->validated_blocks);
- kunmap_local(data);
- continue;
- }
- r = verity_handle_data_hash_mismatch(v, io, bio, cur_block,
- data);
- kunmap_local(data);
+ if (io->num_pending) {
+ r = verity_verify_pending_blocks(v, io, bio);
if (unlikely(r))
- return r;
+ goto error;
}
return 0;
+
+error:
+ verity_clear_pending_blocks(io);
+ return r;
}
/*
@@ -775,6 +848,10 @@ static void verity_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
DMEMIT("%c", v->hash_failed ? 'C' : 'V');
+ if (verity_fec_is_enabled(v))
+ DMEMIT(" %lld", atomic64_read(&v->fec->corrected));
+ else
+ DMEMIT(" -");
break;
case STATUSTYPE_TABLE:
DMEMIT("%u %s %s %u %u %llu %llu %s ",
@@ -1004,7 +1081,7 @@ static void verity_dtr(struct dm_target *ti)
kvfree(v->validated_blocks);
kfree(v->salt);
- kfree(v->initial_hashstate);
+ kfree(v->initial_hashstate.shash);
kfree(v->root_digest);
kfree(v->zero_digest);
verity_free_sig(v);
@@ -1069,8 +1146,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
if (!v->zero_digest)
return r;
- io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm),
- GFP_KERNEL);
+ io = kmalloc(v->ti->per_io_data_size, GFP_KERNEL);
if (!io)
return r; /* verity_dtr will free zero_digest */
@@ -1252,11 +1328,26 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
}
v->shash_tfm = shash;
v->digest_size = crypto_shash_digestsize(shash);
- DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash));
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
ti->error = "Digest size too big";
return -EINVAL;
}
+ if (likely(v->version && strcmp(alg_name, "sha256") == 0)) {
+ /*
+ * Fast path: use the library API for reduced overhead and
+ * interleaved hashing support.
+ */
+ v->use_sha256_lib = true;
+ if (sha256_finup_2x_is_optimized())
+ v->use_sha256_finup_2x = true;
+ ti->per_io_data_size =
+ offsetofend(struct dm_verity_io, hash_ctx.sha256);
+ } else {
+ /* Fallback case: use the generic crypto API. */
+ ti->per_io_data_size =
+ offsetofend(struct dm_verity_io, hash_ctx.shash) +
+ crypto_shash_descsize(shash);
+ }
return 0;
}
@@ -1277,7 +1368,18 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
return -EINVAL;
}
}
- if (v->version) { /* Version 1: salt at beginning */
+ if (likely(v->use_sha256_lib)) {
+ /* Implies version 1: salt at beginning */
+ v->initial_hashstate.sha256 =
+ kmalloc(sizeof(struct sha256_ctx), GFP_KERNEL);
+ if (!v->initial_hashstate.sha256) {
+ ti->error = "Cannot allocate initial hash state";
+ return -ENOMEM;
+ }
+ sha256_init(v->initial_hashstate.sha256);
+ sha256_update(v->initial_hashstate.sha256,
+ v->salt, v->salt_size);
+ } else if (v->version) { /* Version 1: salt at beginning */
SHASH_DESC_ON_STACK(desc, v->shash_tfm);
int r;
@@ -1285,16 +1387,16 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
* Compute the pre-salted hash state that can be passed to
* crypto_shash_import() for each block later.
*/
- v->initial_hashstate = kmalloc(
+ v->initial_hashstate.shash = kmalloc(
crypto_shash_statesize(v->shash_tfm), GFP_KERNEL);
- if (!v->initial_hashstate) {
+ if (!v->initial_hashstate.shash) {
ti->error = "Cannot allocate initial hash state";
return -ENOMEM;
}
desc->tfm = v->shash_tfm;
r = crypto_shash_init(desc) ?:
crypto_shash_update(desc, v->salt, v->salt_size) ?:
- crypto_shash_export(desc, v->initial_hashstate);
+ crypto_shash_export(desc, v->initial_hashstate.shash);
if (r) {
ti->error = "Cannot set up initial hash state";
return r;
@@ -1556,9 +1658,6 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->per_io_data_size = sizeof(struct dm_verity_io) +
- crypto_shash_descsize(v->shash_tfm);
-
r = verity_fec_ctr(v);
if (r)
goto bad;
@@ -1690,7 +1789,7 @@ static struct target_type verity_target = {
.name = "verity",
/* Note: the LSMs depend on the singleton and immutable features */
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
- .version = {1, 12, 0},
+ .version = {1, 13, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 6d141abd965c..f975a9e5c5d6 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -16,6 +16,7 @@
#include <linux/device-mapper.h>
#include <linux/interrupt.h>
#include <crypto/hash.h>
+#include <crypto/sha2.h>
#define DM_VERITY_MAX_LEVELS 63
@@ -42,7 +43,10 @@ struct dm_verity {
struct crypto_shash *shash_tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
- u8 *initial_hashstate; /* salted initial state, if version >= 1 */
+ union {
+ struct sha256_ctx *sha256; /* for use_sha256_lib=1 */
+ u8 *shash; /* for use_sha256_lib=0 */
+ } initial_hashstate; /* salted initial state, if version >= 1 */
u8 *zero_digest; /* digest for a zero block */
#ifdef CONFIG_SECURITY
u8 *root_digest_sig; /* signature of the root digest */
@@ -59,6 +63,8 @@ struct dm_verity {
unsigned char version;
bool hash_failed:1; /* set if hash of any block failed */
bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
+ bool use_sha256_lib:1; /* use SHA-256 library instead of generic crypto API */
+ bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */
unsigned int digest_size; /* digest size for the current hash algorithm */
enum verity_mode mode; /* mode for handling verification errors */
enum verity_mode error_mode;/* mode for handling I/O errors */
@@ -78,6 +84,13 @@ struct dm_verity {
mempool_t recheck_pool;
};
+struct pending_block {
+ void *data;
+ sector_t blkno;
+ u8 want_digest[HASH_MAX_DIGESTSIZE];
+ u8 real_digest[HASH_MAX_DIGESTSIZE];
+};
+
struct dm_verity_io {
struct dm_verity *v;
@@ -94,28 +107,29 @@ struct dm_verity_io {
struct work_struct work;
struct work_struct bh_work;
- u8 real_digest[HASH_MAX_DIGESTSIZE];
- u8 want_digest[HASH_MAX_DIGESTSIZE];
+ u8 tmp_digest[HASH_MAX_DIGESTSIZE];
/*
- * Temporary space for hashing. This is variable-length and must be at
- * the end of the struct. struct shash_desc is just the fixed part;
- * it's followed by a context of size crypto_shash_descsize(shash_tfm).
+ * This is the queue of data blocks that are pending verification. When
+ * the crypto layer supports interleaved hashing, we allow multiple
+ * blocks to be queued up in order to utilize it. This can improve
+ * performance significantly vs. sequential hashing of each block.
*/
- struct shash_desc hash_desc;
-};
+ int num_pending;
+ struct pending_block pending_blocks[2];
-static inline u8 *verity_io_real_digest(struct dm_verity *v,
- struct dm_verity_io *io)
-{
- return io->real_digest;
-}
-
-static inline u8 *verity_io_want_digest(struct dm_verity *v,
- struct dm_verity_io *io)
-{
- return io->want_digest;
-}
+ /*
+ * Temporary space for hashing. Either sha256 or shash is used,
+ * depending on the value of use_sha256_lib. If shash is used,
+ * then this field is variable-length, with total size
+ * sizeof(struct shash_desc) + crypto_shash_descsize(shash_tfm).
+ * For this reason, this field must be the end of the struct.
+ */
+ union {
+ struct sha256_ctx sha256;
+ struct shash_desc shash;
+ } hash_ctx;
+};
extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
const u8 *data, size_t len, u8 *digest);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 5a840c4ae316..c95e417194b3 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -203,8 +203,6 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
return ret;
}
- md->nr_zones = disk->nr_zones;
-
return 0;
}
@@ -452,7 +450,6 @@ void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim)
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
} else {
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- md->nr_zones = 0;
md->disk->nr_zones = 0;
}
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6c83ab940af7..b63279202260 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -272,7 +272,7 @@ static int __init dm_init(void)
int r, i;
#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
- DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
+ DMINFO("CONFIG_IMA_DISABLE_HTABLE is disabled."
" Duplicate IMA measurements will not be recorded in the IMA log.");
#endif
@@ -1321,6 +1321,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors)
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
BUG_ON(bio_sectors > *tio->len_ptr);
BUG_ON(n_sectors > bio_sectors);
+ BUG_ON(bio->bi_opf & REQ_ATOMIC);
if (static_branch_unlikely(&zoned_enabled) &&
unlikely(bdev_is_zoned(bio->bi_bdev))) {
@@ -1735,8 +1736,12 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
- if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count)
- return BLK_STS_IOERR;
+ if (ci->bio->bi_opf & REQ_ATOMIC) {
+ if (unlikely(!dm_target_supports_atomic_writes(ti->type)))
+ return BLK_STS_IOERR;
+ if (unlikely(len != ci->sector_count))
+ return BLK_STS_IOERR;
+ }
setup_split_accounting(ci, len);
@@ -2439,7 +2444,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
{
struct dm_table *old_map;
sector_t size, old_size;
- int ret;
lockdep_assert_held(&md->suspend_lock);
@@ -2454,11 +2458,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
set_capacity(md->disk, size);
- ret = dm_table_set_restrictions(t, md->queue, limits);
- if (ret) {
- set_capacity(md->disk, old_size);
- old_map = ERR_PTR(ret);
- goto out;
+ if (limits) {
+ int ret = dm_table_set_restrictions(t, md->queue, limits);
+ if (ret) {
+ set_capacity(md->disk, old_size);
+ old_map = ERR_PTR(ret);
+ goto out;
+ }
}
/*
@@ -2836,6 +2842,7 @@ static void dm_wq_work(struct work_struct *work)
static void dm_queue_flush(struct mapped_device *md)
{
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
smp_mb__after_atomic();
queue_work(md->wq, &md->work);
@@ -2848,6 +2855,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
struct queue_limits limits;
+ bool update_limits = true;
int r;
mutex_lock(&md->suspend_lock);
@@ -2857,19 +2865,30 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
goto out;
/*
+ * To avoid a potential deadlock locking the queue limits, disallow
+ * updating the queue limits during a table swap, when updating an
+ * immutable request-based dm device (dm-multipath) during a noflush
+ * suspend. It is userspace's responsibility to make sure that the new
+ * table uses the same limits as the existing table, if it asks for a
+ * noflush suspend.
+ */
+ if (dm_request_based(md) && md->immutable_target &&
+ __noflush_suspending(md))
+ update_limits = false;
+ /*
* If the new table has no data devices, retain the existing limits.
* This helps multipath with queue_if_no_path if all paths disappear,
* then new I/O is queued based on these limits, and then some paths
* reappear.
*/
- if (dm_table_has_no_data_devices(table)) {
+ else if (dm_table_has_no_data_devices(table)) {
live_map = dm_get_live_table_fast(md);
if (live_map)
limits = md->queue->limits;
dm_put_live_table_fast(md);
}
- if (!live_map) {
+ if (update_limits && !live_map) {
r = dm_calculate_queue_limits(table, &limits);
if (r) {
map = ERR_PTR(r);
@@ -2877,7 +2896,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
}
}
- map = __bind(md, table, &limits);
+ map = __bind(md, table, update_limits ? &limits : NULL);
dm_issue_global_event();
out:
@@ -2930,7 +2949,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
/*
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
- * This flag is cleared before dm_suspend returns.
*/
if (noflush)
set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2993,8 +3011,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
if (!r)
set_bit(dmf_suspended_flag, &md->flags);
- if (noflush)
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
if (map)
synchronize_srcu(&md->io_barrier);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f4987f54e01b..4b6182cde859 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2823,14 +2823,18 @@ static void regulator_ena_gpio_free(struct regulator_dev *rdev)
static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
{
struct regulator_enable_gpio *pin = rdev->ena_pin;
+ int ret;
if (!pin)
return -EINVAL;
if (enable) {
/* Enable GPIO at initial use */
- if (pin->enable_count == 0)
- gpiod_set_value_cansleep(pin->gpiod, 1);
+ if (pin->enable_count == 0) {
+ ret = gpiod_set_value_cansleep(pin->gpiod, 1);
+ if (ret)
+ return ret;
+ }
pin->enable_count++;
} else {
@@ -2841,7 +2845,10 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
/* Disable GPIO if not used */
if (pin->enable_count <= 1) {
- gpiod_set_value_cansleep(pin->gpiod, 0);
+ ret = gpiod_set_value_cansleep(pin->gpiod, 0);
+ if (ret)
+ return ret;
+
pin->enable_count = 0;
}
}
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index a2d16e9abfb5..254c0a8a4555 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -330,13 +330,10 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
drvdata->dev = devm_regulator_register(&pdev->dev, &drvdata->desc,
&cfg);
- if (IS_ERR(drvdata->dev)) {
- ret = dev_err_probe(&pdev->dev, PTR_ERR(drvdata->dev),
- "Failed to register regulator: %ld\n",
- PTR_ERR(drvdata->dev));
- gpiod_put(cfg.ena_gpiod);
- return ret;
- }
+ if (IS_ERR(drvdata->dev))
+ return dev_err_probe(&pdev->dev, PTR_ERR(drvdata->dev),
+ "Failed to register regulator: %ld\n",
+ PTR_ERR(drvdata->dev));
platform_set_drvdata(pdev, drvdata);
diff --git a/drivers/regulator/spacemit-p1.c b/drivers/regulator/spacemit-p1.c
index d437e6738ea1..2bf9137e12b1 100644
--- a/drivers/regulator/spacemit-p1.c
+++ b/drivers/regulator/spacemit-p1.c
@@ -87,10 +87,10 @@ static const struct linear_range p1_ldo_ranges[] = {
}
#define P1_BUCK_DESC(_n) \
- P1_REG_DESC(BUCK, buck, _n, "vcc", 0x47, BUCK_MASK, 254, p1_buck_ranges)
+ P1_REG_DESC(BUCK, buck, _n, "vin", 0x47, BUCK_MASK, 254, p1_buck_ranges)
#define P1_ALDO_DESC(_n) \
- P1_REG_DESC(ALDO, aldo, _n, "vcc", 0x5b, LDO_MASK, 117, p1_ldo_ranges)
+ P1_REG_DESC(ALDO, aldo, _n, "vin", 0x5b, LDO_MASK, 117, p1_ldo_ranges)
#define P1_DLDO_DESC(_n) \
P1_REG_DESC(DLDO, dldo, _n, "buck5", 0x67, LDO_MASK, 117, p1_ldo_ranges)
diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c
index 676c085b4f8a..27f0d2f12a8b 100644
--- a/drivers/s390/char/sclp_mem.c
+++ b/drivers/s390/char/sclp_mem.c
@@ -44,6 +44,9 @@ struct sclp_mem {
unsigned int id;
unsigned int memmap_on_memory;
unsigned int config;
+#ifdef CONFIG_KASAN
+ unsigned int early_shadow_mapped;
+#endif
};
struct sclp_mem_arg {
@@ -244,6 +247,16 @@ static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute
put_device(&mem->dev);
sclp_mem_change_state(addr, block_size, 0);
__remove_memory(addr, block_size);
+#ifdef CONFIG_KASAN
+ if (sclp_mem->early_shadow_mapped) {
+ unsigned long start, end;
+
+ start = (unsigned long)kasan_mem_to_shadow(__va(addr));
+ end = start + (block_size >> KASAN_SHADOW_SCALE_SHIFT);
+ vmemmap_free(start, end, NULL);
+ sclp_mem->early_shadow_mapped = 0;
+ }
+#endif
WRITE_ONCE(sclp_mem->config, 0);
}
out_unlock:
@@ -316,6 +329,9 @@ static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset,
sclp_mem->memmap_on_memory = memmap_on_memory;
sclp_mem->config = config;
+#ifdef CONFIG_KASAN
+ sclp_mem->early_shadow_mapped = config;
+#endif
sclp_mem->id = id;
kobject_init(&sclp_mem->kobj, &ktype);
rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id);
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index e3e0e9f36527..a226ff208eda 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -154,7 +154,7 @@ static struct urdev *urdev_get_from_devno(u16 devno)
struct ccw_device *cdev;
struct urdev *urd;
- sprintf(bus_id, "0.0.%04x", devno);
+ scnprintf(bus_id, sizeof(bus_id), "0.0.%04x", devno);
cdev = get_ccwdev_by_busid(&ur_driver, bus_id);
if (!cdev)
return NULL;
@@ -904,11 +904,11 @@ static int ur_set_online(struct ccw_device *cdev)
goto fail_free_cdev;
if (urd->cdev->id.cu_type == READER_PUNCH_DEVTYPE) {
if (urd->class == DEV_CLASS_UR_I)
- sprintf(node_id, "vmrdr-%s", dev_name(&cdev->dev));
+ scnprintf(node_id, sizeof(node_id), "vmrdr-%s", dev_name(&cdev->dev));
if (urd->class == DEV_CLASS_UR_O)
- sprintf(node_id, "vmpun-%s", dev_name(&cdev->dev));
+ scnprintf(node_id, sizeof(node_id), "vmpun-%s", dev_name(&cdev->dev));
} else if (urd->cdev->id.cu_type == PRINTER_DEVTYPE) {
- sprintf(node_id, "vmprt-%s", dev_name(&cdev->dev));
+ scnprintf(node_id, sizeof(node_id), "vmprt-%s", dev_name(&cdev->dev));
} else {
rc = -EOPNOTSUPP;
goto fail_free_cdev;
diff --git a/drivers/spi/spi-microchip-core-spi.c b/drivers/spi/spi-microchip-core-spi.c
index 98bf0e6cd00e..89e40fc45d73 100644
--- a/drivers/spi/spi-microchip-core-spi.c
+++ b/drivers/spi/spi-microchip-core-spi.c
@@ -387,6 +387,7 @@ static int mchp_corespi_probe(struct platform_device *pdev)
ret = devm_spi_register_controller(dev, host);
if (ret) {
+ mchp_corespi_disable_ints(spi);
mchp_corespi_disable(spi);
return dev_err_probe(dev, ret, "unable to register host for CoreSPI controller\n");
}
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 2ceda49c609f..aa36a0d1d9df 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -90,7 +90,7 @@
*/
#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
-#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0)
+#define DMA_BIT_MASK(n) GENMASK_ULL((n) - 1, 0)
struct dma_iova_state {
dma_addr_t addr;
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 37e0b5b5600a..17902861de76 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -182,9 +182,9 @@ int generic_handle_irq_safe(unsigned int irq);
* and handle the result interrupt number. Return -EINVAL if
* conversion failed.
*/
-int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
-int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq);
-int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
+int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq);
+int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq);
+int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq);
#endif
/* Test to see if a driver has successfully requested an irq */
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 36ddfa1ec301..67d2bf579942 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -32,7 +32,7 @@ struct restart_block {
u32 val;
u32 flags;
u32 bitset;
- u64 time;
+ ktime_t time;
u32 __user *uaddr2;
} futex;
/* For nanosleep */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index cf443f064a66..2482992248dc 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -1150,10 +1150,17 @@ static inline void kvfree_rcu_barrier(void)
rcu_barrier();
}
+static inline void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
+{
+ rcu_barrier();
+}
+
static inline void kfree_rcu_scheduler_running(void) { }
#else
void kvfree_rcu_barrier(void);
+void kvfree_rcu_barrier_on_cache(struct kmem_cache *s);
+
void kfree_rcu_scheduler_running(void);
#endif
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index ee45dee33d49..26392badc36b 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -93,7 +93,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
page = dma_alloc_from_contiguous(NULL, 1 << order,
order, false);
if (!page)
- page = alloc_pages(gfp, order);
+ page = alloc_pages(gfp | __GFP_NOWARN, order);
} while (!page && order-- > 0);
if (!page)
goto out;
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index e2bbe5509ec2..1c2dd03f11ec 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -738,12 +738,11 @@ int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time
static long futex_wait_restart(struct restart_block *restart)
{
u32 __user *uaddr = restart->futex.uaddr;
- ktime_t t, *tp = NULL;
+ ktime_t *tp = NULL;
+
+ if (restart->futex.flags & FLAGS_HAS_TIMEOUT)
+ tp = &restart->futex.time;
- if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
- t = restart->futex.time;
- tp = &t;
- }
restart->fn = do_no_restart_syscall;
return (long)futex_wait(uaddr, restart->futex.flags,
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 6acf268f005b..f8e4e13dbe33 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -720,7 +720,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
* This function must be called from an IRQ context with irq regs
* initialized.
*/
-int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
+int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq)
{
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
@@ -738,7 +738,7 @@ EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
* context). If the interrupt is marked as 'enforce IRQ-context only' then
* the function must be invoked from hard interrupt context.
*/
-int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq)
+int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq)
{
unsigned long flags;
int ret;
@@ -761,7 +761,7 @@ EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe);
* This function must be called from an NMI context with irq regs
* initialized.
**/
-int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq)
+int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq)
{
WARN_ON_ONCE(!in_nmi());
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
diff --git a/mm/slab.h b/mm/slab.h
index f730e012553c..e767aa7e91b0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -422,6 +422,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s)
bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
void flush_all_rcu_sheaves(void);
+void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_PANIC | \
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b613533b29e7..eed7ea556cb1 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -492,7 +492,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
return;
/* in-flight kfree_rcu()'s may include objects from our cache */
- kvfree_rcu_barrier();
+ kvfree_rcu_barrier_on_cache(s);
if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
(s->flags & SLAB_TYPESAFE_BY_RCU)) {
@@ -2038,25 +2038,13 @@ unlock_return:
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
-/**
- * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
- *
- * Note that a single argument of kvfree_rcu() call has a slow path that
- * triggers synchronize_rcu() following by freeing a pointer. It is done
- * before the return from the function. Therefore for any single-argument
- * call that will result in a kfree() to a cache that is to be destroyed
- * during module exit, it is developer's responsibility to ensure that all
- * such calls have returned before the call to kmem_cache_destroy().
- */
-void kvfree_rcu_barrier(void)
+static inline void __kvfree_rcu_barrier(void)
{
struct kfree_rcu_cpu_work *krwp;
struct kfree_rcu_cpu *krcp;
bool queued;
int i, cpu;
- flush_all_rcu_sheaves();
-
/*
* Firstly we detach objects and queue them over an RCU-batch
* for all CPUs. Finally queued works are flushed for each CPU.
@@ -2118,8 +2106,43 @@ void kvfree_rcu_barrier(void)
}
}
}
+
+/**
+ * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
+ *
+ * Note that a single argument of kvfree_rcu() call has a slow path that
+ * triggers synchronize_rcu() following by freeing a pointer. It is done
+ * before the return from the function. Therefore for any single-argument
+ * call that will result in a kfree() to a cache that is to be destroyed
+ * during module exit, it is developer's responsibility to ensure that all
+ * such calls have returned before the call to kmem_cache_destroy().
+ */
+void kvfree_rcu_barrier(void)
+{
+ flush_all_rcu_sheaves();
+ __kvfree_rcu_barrier();
+}
EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
+/**
+ * kvfree_rcu_barrier_on_cache - Wait for in-flight kvfree_rcu() calls on a
+ * specific slab cache.
+ * @s: slab cache to wait for
+ *
+ * See the description of kvfree_rcu_barrier() for details.
+ */
+void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
+{
+ if (s->cpu_sheaves)
+ flush_rcu_sheaves_on_cache(s);
+ /*
+ * TODO: Introduce a version of __kvfree_rcu_barrier() that works
+ * on a specific slab cache.
+ */
+ __kvfree_rcu_barrier();
+}
+EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache);
+
static unsigned long
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
@@ -2215,4 +2238,3 @@ void __init kvfree_rcu_init(void)
}
#endif /* CONFIG_KVFREE_RCU_BATCHED */
-
diff --git a/mm/slub.c b/mm/slub.c
index e6a330e24145..f21b2f0c6f5a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4122,42 +4122,47 @@ static void flush_rcu_sheaf(struct work_struct *w)
/* needed for kvfree_rcu_barrier() */
-void flush_all_rcu_sheaves(void)
+void flush_rcu_sheaves_on_cache(struct kmem_cache *s)
{
struct slub_flush_work *sfw;
- struct kmem_cache *s;
unsigned int cpu;
- cpus_read_lock();
- mutex_lock(&slab_mutex);
+ mutex_lock(&flush_lock);
- list_for_each_entry(s, &slab_caches, list) {
- if (!s->cpu_sheaves)
- continue;
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
- mutex_lock(&flush_lock);
+ /*
+ * we don't check if rcu_free sheaf exists - racing
+ * __kfree_rcu_sheaf() might have just removed it.
+ * by executing flush_rcu_sheaf() on the cpu we make
+ * sure the __kfree_rcu_sheaf() finished its call_rcu()
+ */
- for_each_online_cpu(cpu) {
- sfw = &per_cpu(slub_flush, cpu);
+ INIT_WORK(&sfw->work, flush_rcu_sheaf);
+ sfw->s = s;
+ queue_work_on(cpu, flushwq, &sfw->work);
+ }
- /*
- * we don't check if rcu_free sheaf exists - racing
- * __kfree_rcu_sheaf() might have just removed it.
- * by executing flush_rcu_sheaf() on the cpu we make
- * sure the __kfree_rcu_sheaf() finished its call_rcu()
- */
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
+ flush_work(&sfw->work);
+ }
- INIT_WORK(&sfw->work, flush_rcu_sheaf);
- sfw->s = s;
- queue_work_on(cpu, flushwq, &sfw->work);
- }
+ mutex_unlock(&flush_lock);
+}
- for_each_online_cpu(cpu) {
- sfw = &per_cpu(slub_flush, cpu);
- flush_work(&sfw->work);
- }
+void flush_all_rcu_sheaves(void)
+{
+ struct kmem_cache *s;
+
+ cpus_read_lock();
+ mutex_lock(&slab_mutex);
- mutex_unlock(&flush_lock);
+ list_for_each_entry(s, &slab_caches, list) {
+ if (!s->cpu_sheaves)
+ continue;
+ flush_rcu_sheaves_on_cache(s);
}
mutex_unlock(&slab_mutex);
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 776ad658f75e..23b9fea8d190 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -12,3 +12,4 @@ futex_wait_uninitialized_heap
futex_wait_wouldblock
futex_waitv
futex_numa
+robust_list
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 490ace1f017e..af7ec309ea78 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -22,7 +22,8 @@ TEST_GEN_PROGS := \
futex_priv_hash \
futex_numa_mpol \
futex_waitv \
- futex_numa
+ futex_numa \
+ robust_list
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index ab8555752137..220ef219c823 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -131,11 +131,6 @@ static void test_futex(void *futex_ptr, int err_value)
__test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
}
-static void test_futex_mpol(void *futex_ptr, int err_value)
-{
- __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
-}
-
TEST(futex_numa_mpol)
{
struct futex32_numa *futex_numa;
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
index 0e69c53524c1..7b8879409007 100644
--- a/tools/testing/selftests/futex/functional/futex_wait.c
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -71,6 +71,8 @@ TEST(anon_page)
/* Testing an anon page shared memory */
shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
if (shm_id < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("shmget syscall not supported\n");
perror("shmget");
exit(1);
}
@@ -108,14 +110,14 @@ TEST(file_backed)
/* Testing a file backed shared memory */
fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (fd < 0)
- ksft_exit_fail_msg("open");
+ ksft_exit_fail_msg("open\n");
if (ftruncate(fd, sizeof(f_private)))
- ksft_exit_fail_msg("ftruncate");
+ ksft_exit_fail_msg("ftruncate\n");
shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (shm == MAP_FAILED)
- ksft_exit_fail_msg("mmap");
+ ksft_exit_fail_msg("mmap\n");
memcpy(shm, &f_private, sizeof(f_private));
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
index d60876164d4b..b5ada9fdb26f 100644
--- a/tools/testing/selftests/futex/functional/futex_waitv.c
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -86,6 +86,8 @@ TEST(shared_waitv)
int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
if (shm_id < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("shmget syscall not supported\n");
perror("shmget");
exit(1);
}
diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c
new file mode 100644
index 000000000000..e7d1254e18ca
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/robust_list.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Igalia S.L.
+ *
+ * Robust list test by André Almeida <andrealmeid@igalia.com>
+ *
+ * The robust list uAPI allows userspace to create "robust" locks, in the sense
+ * that if the lock holder thread dies, the remaining threads that are waiting
+ * for the lock won't block forever, waiting for a lock that will never be
+ * released.
+ *
+ * This is achieve by userspace setting a list where a thread can enter all the
+ * locks (futexes) that it is holding. The robust list is a linked list, and
+ * userspace register the start of the list with the syscall set_robust_list().
+ * If such thread eventually dies, the kernel will walk this list, waking up one
+ * thread waiting for each futex and marking the futex word with the flag
+ * FUTEX_OWNER_DIED.
+ *
+ * See also
+ * man set_robust_list
+ * Documententation/locking/robust-futex-ABI.rst
+ * Documententation/locking/robust-futexes.rst
+ */
+
+#define _GNU_SOURCE
+
+#include "futextest.h"
+#include "../../kselftest_harness.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#define STACK_SIZE (1024 * 1024)
+
+#define FUTEX_TIMEOUT 3
+
+#define SLEEP_US 100
+
+static pthread_barrier_t barrier, barrier2;
+
+static int set_robust_list(struct robust_list_head *head, size_t len)
+{
+ return syscall(SYS_set_robust_list, head, len);
+}
+
+static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr)
+{
+ return syscall(SYS_get_robust_list, pid, head, len_ptr);
+}
+
+/*
+ * Basic lock struct, contains just the futex word and the robust list element
+ * Real implementations have also a *prev to easily walk in the list
+ */
+struct lock_struct {
+ _Atomic(unsigned int) futex;
+ struct robust_list list;
+};
+
+/*
+ * Helper function to spawn a child thread. Returns -1 on error, pid on success
+ */
+static int create_child(int (*fn)(void *arg), void *arg)
+{
+ char *stack;
+ pid_t pid;
+
+ stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (stack == MAP_FAILED)
+ return -1;
+
+ stack += STACK_SIZE;
+
+ pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg);
+
+ if (pid == -1)
+ return -1;
+
+ return pid;
+}
+
+/*
+ * Helper function to prepare and register a robust list
+ */
+static int set_list(struct robust_list_head *head)
+{
+ int ret;
+
+ ret = set_robust_list(head, sizeof(*head));
+ if (ret)
+ return ret;
+
+ head->futex_offset = (size_t) offsetof(struct lock_struct, futex) -
+ (size_t) offsetof(struct lock_struct, list);
+ head->list.next = &head->list;
+ head->list_op_pending = NULL;
+
+ return 0;
+}
+
+/*
+ * A basic (and incomplete) mutex lock function with robustness
+ */
+static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject)
+{
+ _Atomic(unsigned int) *futex = &lock->futex;
+ unsigned int zero = 0;
+ pid_t tid = gettid();
+ int ret = -1;
+
+ /*
+ * Set list_op_pending before starting the lock, so the kernel can catch
+ * the case where the thread died during the lock operation
+ */
+ head->list_op_pending = &lock->list;
+
+ if (atomic_compare_exchange_strong(futex, &zero, tid)) {
+ /*
+ * We took the lock, insert it in the robust list
+ */
+ struct robust_list *list = &head->list;
+
+ /* Error injection to test list_op_pending */
+ if (error_inject)
+ return 0;
+
+ while (list->next != &head->list)
+ list = list->next;
+
+ list->next = &lock->list;
+ lock->list.next = &head->list;
+
+ ret = 0;
+ } else {
+ /*
+ * We didn't take the lock, wait until the owner wakes (or dies)
+ */
+ struct timespec to;
+
+ to.tv_sec = FUTEX_TIMEOUT;
+ to.tv_nsec = 0;
+
+ tid = atomic_load(futex);
+ /* Kernel ignores futexes without the waiters flag */
+ tid |= FUTEX_WAITERS;
+ atomic_store(futex, tid);
+
+ ret = futex_wait((futex_t *) futex, tid, &to, 0);
+
+ /*
+ * A real mutex_lock() implementation would loop here to finally
+ * take the lock. We don't care about that, so we stop here.
+ */
+ }
+
+ head->list_op_pending = NULL;
+
+ return ret;
+}
+
+/*
+ * This child thread will succeed taking the lock, and then will exit holding it
+ */
+static int child_fn_lock(void *arg)
+{
+ struct lock_struct *lock = arg;
+ struct robust_list_head head;
+ int ret;
+
+ ret = set_list(&head);
+ if (ret) {
+ ksft_test_result_fail("set_robust_list error\n");
+ return ret;
+ }
+
+ ret = mutex_lock(lock, &head, false);
+ if (ret) {
+ ksft_test_result_fail("mutex_lock error\n");
+ return ret;
+ }
+
+ pthread_barrier_wait(&barrier);
+
+ /*
+ * There's a race here: the parent thread needs to be inside
+ * futex_wait() before the child thread dies, otherwise it will miss the
+ * wakeup from handle_futex_death() that this child will emit. We wait a
+ * little bit just to make sure that this happens.
+ */
+ usleep(SLEEP_US);
+
+ return 0;
+}
+
+/*
+ * Spawns a child thread that will set a robust list, take the lock, register it
+ * in the robust list and die. The parent thread will wait on this futex, and
+ * should be waken up when the child exits.
+ */
+TEST(test_robustness)
+{
+ struct lock_struct lock = { .futex = 0 };
+ _Atomic(unsigned int) *futex = &lock.futex;
+ struct robust_list_head head;
+ int ret, pid, wstatus;
+
+ ret = set_list(&head);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Lets use a barrier to ensure that the child thread takes the lock
+ * before the parent
+ */
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ pid = create_child(&child_fn_lock, &lock);
+ ASSERT_NE(pid, -1);
+
+ pthread_barrier_wait(&barrier);
+ ret = mutex_lock(&lock, &head, false);
+
+ /*
+ * futex_wait() should return 0 and the futex word should be marked with
+ * FUTEX_OWNER_DIED
+ */
+ ASSERT_EQ(ret, 0);
+
+ ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
+
+ wait(&wstatus);
+ pthread_barrier_destroy(&barrier);
+
+ /* Pass only if the child hasn't return error */
+ if (!WEXITSTATUS(wstatus))
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+/*
+ * The only valid value for len is sizeof(*head)
+ */
+TEST(test_set_robust_list_invalid_size)
+{
+ struct robust_list_head head;
+ size_t head_size = sizeof(head);
+ int ret;
+
+ ret = set_robust_list(&head, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = set_robust_list(&head, head_size * 2);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ ret = set_robust_list(&head, head_size - 1);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ ret = set_robust_list(&head, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+/*
+ * Test get_robust_list with pid = 0, getting the list of the running thread
+ */
+TEST(test_get_robust_list_self)
+{
+ struct robust_list_head head, head2, *get_head;
+ size_t head_size = sizeof(head), len_ptr;
+ int ret;
+
+ ret = set_robust_list(&head, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = get_robust_list(0, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(get_head, &head);
+ ASSERT_EQ(head_size, len_ptr);
+
+ ret = set_robust_list(&head2, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = get_robust_list(0, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(get_head, &head2);
+ ASSERT_EQ(head_size, len_ptr);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static int child_list(void *arg)
+{
+ struct robust_list_head *head = arg;
+ int ret;
+
+ ret = set_robust_list(head, sizeof(*head));
+ if (ret) {
+ ksft_test_result_fail("set_robust_list error\n");
+ return -1;
+ }
+
+ /*
+ * After setting the list head, wait until the main thread can call
+ * get_robust_list() for this thread before exiting.
+ */
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_wait(&barrier2);
+
+ return 0;
+}
+
+/*
+ * Test get_robust_list from another thread. We use two barriers here to ensure
+ * that:
+ * 1) the child thread set the list before we try to get it from the
+ * parent
+ * 2) the child thread still alive when we try to get the list from it
+ */
+TEST(test_get_robust_list_child)
+{
+ struct robust_list_head head, *get_head;
+ int ret, wstatus;
+ size_t len_ptr;
+ pid_t tid;
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ret = pthread_barrier_init(&barrier2, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ tid = create_child(&child_list, &head);
+ ASSERT_NE(tid, -1);
+
+ pthread_barrier_wait(&barrier);
+
+ ret = get_robust_list(tid, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(&head, get_head);
+
+ pthread_barrier_wait(&barrier2);
+
+ wait(&wstatus);
+ pthread_barrier_destroy(&barrier);
+ pthread_barrier_destroy(&barrier2);
+
+ /* Pass only if the child hasn't return error */
+ if (!WEXITSTATUS(wstatus))
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static int child_fn_lock_with_error(void *arg)
+{
+ struct lock_struct *lock = arg;
+ struct robust_list_head head;
+ int ret;
+
+ ret = set_list(&head);
+ if (ret) {
+ ksft_test_result_fail("set_robust_list error\n");
+ return -1;
+ }
+
+ ret = mutex_lock(lock, &head, true);
+ if (ret) {
+ ksft_test_result_fail("mutex_lock error\n");
+ return -1;
+ }
+
+ pthread_barrier_wait(&barrier);
+
+ /* See comment at child_fn_lock() */
+ usleep(SLEEP_US);
+
+ return 0;
+}
+
+/*
+ * Same as robustness test, but inject an error where the mutex_lock() exits
+ * earlier, just after setting list_op_pending and taking the lock, to test the
+ * list_op_pending mechanism
+ */
+TEST(test_set_list_op_pending)
+{
+ struct lock_struct lock = { .futex = 0 };
+ _Atomic(unsigned int) *futex = &lock.futex;
+ struct robust_list_head head;
+ int ret, wstatus;
+
+ ret = set_list(&head);
+ ASSERT_EQ(ret, 0);
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ ret = create_child(&child_fn_lock_with_error, &lock);
+ ASSERT_NE(ret, -1);
+
+ pthread_barrier_wait(&barrier);
+ ret = mutex_lock(&lock, &head, false);
+
+ ASSERT_EQ(ret, 0);
+
+ ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
+
+ wait(&wstatus);
+ pthread_barrier_destroy(&barrier);
+
+ /* Pass only if the child hasn't return error */
+ if (!WEXITSTATUS(wstatus))
+ ksft_test_result_pass("%s\n", __func__);
+ else
+ ksft_test_result_fail("%s\n", __func__);
+}
+
+#define CHILD_NR 10
+
+static int child_lock_holder(void *arg)
+{
+ struct lock_struct *locks = arg;
+ struct robust_list_head head;
+ int i;
+
+ set_list(&head);
+
+ for (i = 0; i < CHILD_NR; i++) {
+ locks[i].futex = 0;
+ mutex_lock(&locks[i], &head, false);
+ }
+
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_wait(&barrier2);
+
+ /* See comment at child_fn_lock() */
+ usleep(SLEEP_US);
+
+ return 0;
+}
+
+static int child_wait_lock(void *arg)
+{
+ struct lock_struct *lock = arg;
+ struct robust_list_head head;
+ int ret;
+
+ pthread_barrier_wait(&barrier2);
+ ret = mutex_lock(lock, &head, false);
+
+ if (ret) {
+ ksft_test_result_fail("mutex_lock error\n");
+ return -1;
+ }
+
+ if (!(lock->futex & FUTEX_OWNER_DIED)) {
+ ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Test a robust list of more than one element. All the waiters should wake when
+ * the holder dies
+ */
+TEST(test_robust_list_multiple_elements)
+{
+ struct lock_struct locks[CHILD_NR];
+ pid_t pids[CHILD_NR + 1];
+ int i, ret, wstatus;
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+ ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1);
+ ASSERT_EQ(ret, 0);
+
+ pids[0] = create_child(&child_lock_holder, &locks);
+
+ /* Wait until the locker thread takes the look */
+ pthread_barrier_wait(&barrier);
+
+ for (i = 0; i < CHILD_NR; i++)
+ pids[i+1] = create_child(&child_wait_lock, &locks[i]);
+
+ /* Wait for all children to return */
+ ret = 0;
+
+ for (i = 0; i < CHILD_NR; i++) {
+ waitpid(pids[i], &wstatus, 0);
+ if (WEXITSTATUS(wstatus))
+ ret = -1;
+ }
+
+ pthread_barrier_destroy(&barrier);
+ pthread_barrier_destroy(&barrier2);
+
+ /* Pass only if the child hasn't return error */
+ if (!ret)
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static int child_circular_list(void *arg)
+{
+ static struct robust_list_head head;
+ struct lock_struct a, b, c;
+ int ret;
+
+ ret = set_list(&head);
+ if (ret) {
+ ksft_test_result_fail("set_list error\n");
+ return -1;
+ }
+
+ head.list.next = &a.list;
+
+ /*
+ * The last element should point to head list, but we short circuit it
+ */
+ a.list.next = &b.list;
+ b.list.next = &c.list;
+ c.list.next = &a.list;
+
+ return 0;
+}
+
+/*
+ * Create a circular robust list. The kernel should be able to destroy the list
+ * while processing it so it won't be trapped in an infinite loop while handling
+ * a process exit
+ */
+TEST(test_circular_list)
+{
+ int wstatus;
+
+ create_child(child_circular_list, NULL);
+
+ wait(&wstatus);
+
+ /* Pass only if the child hasn't return error */
+ if (!WEXITSTATUS(wstatus))
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+TEST_HARNESS_MAIN