summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/binfmts.h4
-rw-r--r--include/linux/bio-integrity.h25
-rw-r--r--include/linux/bio.h5
-rw-r--r--include/linux/blk-mq-pci.h11
-rw-r--r--include/linux/blk-mq-virtio.h11
-rw-r--r--include/linux/blk-mq.h35
-rw-r--r--include/linux/blkdev.h36
-rw-r--r--include/linux/bvec.h7
-rw-r--r--include/linux/cc_platform.h8
-rw-r--r--include/linux/cgroup_dmem.h66
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--include/linux/compiler-gcc.h12
-rw-r--r--include/linux/compiler.h37
-rw-r--r--include/linux/coredump.h4
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cred.h43
-rw-r--r--include/linux/device/bus.h3
-rw-r--r--include/linux/exportfs.h20
-rw-r--r--include/linux/fiemap.h16
-rw-r--r--include/linux/folio_queue.h12
-rw-r--r--include/linux/fprobe.h62
-rw-r--r--include/linux/fs.h31
-rw-r--r--include/linux/fs_parser.h2
-rw-r--r--include/linux/ftrace.h116
-rw-r--r--include/linux/ftrace_regs.h2
-rw-r--r--include/linux/hrtimer.h1
-rw-r--r--include/linux/instrumentation.h11
-rw-r--r--include/linux/io_uring_types.h26
-rw-r--r--include/linux/irq.h17
-rw-r--r--include/linux/kref.h48
-rw-r--r--include/linux/kthread.h56
-rw-r--r--include/linux/libata.h4
-rw-r--r--include/linux/libgcc.h4
-rw-r--r--include/linux/lockref.h26
-rw-r--r--include/linux/lsm_audit.h16
-rw-r--r--include/linux/lsm_hook_defs.h13
-rw-r--r--include/linux/min_heap.h4
-rw-r--r--include/linux/mm.h12
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mmap_lock.h84
-rw-r--r--include/linux/mmu_context.h1
-rw-r--r--include/linux/mod_devicetable.h2
-rw-r--r--include/linux/mount.h4
-rw-r--r--include/linux/netfs.h54
-rw-r--r--include/linux/nvme.h42
-rw-r--r--include/linux/objtool.h126
-rw-r--r--include/linux/objtool_types.h12
-rw-r--r--include/linux/page_counter.h2
-rw-r--r--include/linux/perf_event.h32
-rw-r--r--include/linux/pid.h5
-rw-r--r--include/linux/pid_namespace.h10
-rw-r--r--include/linux/pidfs.h3
-rw-r--r--include/linux/platform_data/cros_ec_proto.h7
-rw-r--r--include/linux/pm_opp.h7
-rw-r--r--include/linux/pruss_driver.h12
-rw-r--r--include/linux/pseudo_fs.h1
-rw-r--r--include/linux/rbtree.h37
-rw-r--r--include/linux/rculist.h44
-rw-r--r--include/linux/rcupdate_wait.h11
-rw-r--r--include/linux/rolling_buffer.h61
-rw-r--r--include/linux/sched.h19
-rw-r--r--include/linux/sched/isolation.h21
-rw-r--r--include/linux/sched/topology.h13
-rw-r--r--include/linux/sched/wake_q.h34
-rw-r--r--include/linux/security.h37
-rw-r--r--include/linux/seqlock.h24
-rw-r--r--include/linux/slab.h1
-rw-r--r--include/linux/srcu.h21
-rw-r--r--include/linux/srcutree.h8
-rw-r--r--include/linux/stat.h1
-rw-r--r--include/linux/timekeeping.h15
-rw-r--r--include/linux/torture.h2
-rw-r--r--include/linux/tty_driver.h2
-rw-r--r--include/linux/uio.h9
-rw-r--r--include/linux/uprobes.h16
75 files changed, 1152 insertions, 446 deletions
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e6c00e860951..3305c849abd6 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -42,7 +42,9 @@ struct linux_binprm {
* Set when errors can no longer be returned to the
* original userspace.
*/
- point_of_no_return:1;
+ point_of_no_return:1,
+ /* Set when "comm" must come from the dentry. */
+ comm_from_dentry:1;
struct file *executable; /* Executable to pass to the interpreter */
struct file *interpreter;
struct file *file;
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index dbf0f74c1529..802f52e38efd 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -7,10 +7,12 @@
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
- BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
- BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
- BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
- BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */
+ BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */
+ BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */
+ BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */
+ BIP_CHECK_GUARD = 1 << 5, /* guard check */
+ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
+ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
};
struct bio_integrity_payload {
@@ -21,6 +23,7 @@ struct bio_integrity_payload {
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_max_vcnt; /* integrity bio_vec slots */
unsigned short bip_flags; /* control flags */
+ u16 app_tag; /* application tag value */
struct bvec_iter bio_iter; /* for rewinding parent bio */
@@ -30,6 +33,9 @@ struct bio_integrity_payload {
struct bio_vec bip_inline_vecs[];/* embedded bvec array */
};
+#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \
+ BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
#define bip_for_each_vec(bvl, bip, iter) \
@@ -72,7 +78,8 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
unsigned int nr);
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset);
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
+int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter);
+int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta);
void bio_integrity_unmap_user(struct bio *bio);
bool bio_integrity_prep(struct bio *bio);
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
@@ -98,8 +105,12 @@ static inline void bioset_integrity_free(struct bio_set *bs)
{
}
-static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
- ssize_t len)
+static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
+{
+ return -EINVAL;
+}
+
+static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta)
{
return -EINVAL;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7a1b3b1a8fed..4b79bf50f4f0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -19,9 +19,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
return min(nr_segs, BIO_MAX_VECS);
}
-#define bio_prio(bio) (bio)->bi_ioprio
-#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio)
-
#define bio_iter_iovec(bio, iter) \
bvec_iter_bvec((bio)->bi_io_vec, (iter))
@@ -416,8 +413,6 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
unsigned off);
bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
size_t len, size_t off);
-extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
- unsigned int, unsigned int);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
deleted file mode 100644
index ca544e1d3508..000000000000
--- a/include/linux/blk-mq-pci.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BLK_MQ_PCI_H
-#define _LINUX_BLK_MQ_PCI_H
-
-struct blk_mq_queue_map;
-struct pci_dev;
-
-void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
- int offset);
-
-#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
deleted file mode 100644
index 13226e9b22dd..000000000000
--- a/include/linux/blk-mq-virtio.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BLK_MQ_VIRTIO_H
-#define _LINUX_BLK_MQ_VIRTIO_H
-
-struct blk_mq_queue_map;
-struct virtio_device;
-
-void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
- struct virtio_device *vdev, int first_vec);
-
-#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c596e0e4cb75..a0a9007cc1e3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -296,13 +296,6 @@ enum blk_eh_timer_return {
BLK_EH_RESET_TIMER,
};
-/* Keep alloc_policy_name[] in sync with the definitions below */
-enum {
- BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */
- BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */
- BLK_TAG_ALLOC_MAX
-};
-
/**
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
* block device
@@ -668,7 +661,6 @@ struct blk_mq_ops {
/* Keep hctx_flag_name[] in sync with the definitions below */
enum {
- BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
/*
* Set when this device requires underlying blk-mq device for
@@ -677,23 +669,20 @@ enum {
BLK_MQ_F_STACKING = 1 << 2,
BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
BLK_MQ_F_BLOCKING = 1 << 4,
- /* Do not allow an I/O scheduler to be configured. */
- BLK_MQ_F_NO_SCHED = 1 << 5,
+
+ /*
+ * Alloc tags on a round-robin base instead of the first available one.
+ */
+ BLK_MQ_F_TAG_RR = 1 << 5,
/*
* Select 'none' during queue registration in case of a single hwq
* or shared hwqs instead of 'mq-deadline'.
*/
BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6,
- BLK_MQ_F_ALLOC_POLICY_START_BIT = 7,
- BLK_MQ_F_ALLOC_POLICY_BITS = 1,
+
+ BLK_MQ_F_MAX = 1 << 7,
};
-#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
- ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
- ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
-#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
- ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
- << BLK_MQ_F_ALLOC_POLICY_START_BIT)
#define BLK_MQ_MAX_DEPTH (10240)
#define BLK_MQ_NO_HCTX_IDX (-1U)
@@ -921,6 +910,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
void blk_freeze_queue_start_non_owner(struct request_queue *q);
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
+void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
+ struct device *dev, unsigned int offset);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
@@ -977,14 +968,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
rq->q->mq_ops->cleanup_rq(rq);
}
-static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
- unsigned int nr_segs)
-{
- rq->nr_phys_segments = nr_segs;
- rq->__data_len = bio->bi_iter.bi_size;
- rq->bio = rq->biotail = bio;
-}
-
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
struct lock_class_key *key);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 378d3a1a22fc..76f0a4e7c2e5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -331,8 +331,8 @@ typedef unsigned int __bitwise blk_features_t;
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
-/* stacked device can/does support atomic writes */
-#define BLK_FEAT_ATOMIC_WRITES_STACKED \
+/* atomic writes enabled */
+#define BLK_FEAT_ATOMIC_WRITES \
((__force blk_features_t)(1u << 16))
/*
@@ -581,6 +581,12 @@ struct request_queue {
#ifdef CONFIG_LOCKDEP
struct task_struct *mq_freeze_owner;
int mq_freeze_owner_depth;
+ /*
+ * Records disk & queue state in current context, used in unfreeze
+ * queue
+ */
+ bool mq_freeze_disk_dead;
+ bool mq_freeze_queue_dying;
#endif
wait_queue_head_t mq_freeze_wq;
/*
@@ -938,8 +944,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset,
* the caller can modify. The caller must call queue_limits_commit_update()
* to finish the update.
*
- * Context: process context. The caller must have frozen the queue or ensured
- * that there is outstanding I/O by other means.
+ * Context: process context.
*/
static inline struct queue_limits
queue_limits_start_update(struct request_queue *q)
@@ -947,6 +952,8 @@ queue_limits_start_update(struct request_queue *q)
mutex_lock(&q->limits_lock);
return q->limits;
}
+int queue_limits_commit_update_frozen(struct request_queue *q,
+ struct queue_limits *lim);
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim);
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
@@ -1699,6 +1706,15 @@ struct io_comp_batch {
void (*complete)(struct io_comp_batch *);
};
+static inline bool blk_atomic_write_start_sect_aligned(sector_t sector,
+ struct queue_limits *limits)
+{
+ unsigned int alignment = max(limits->atomic_write_hw_unit_min,
+ limits->atomic_write_hw_boundary);
+
+ return IS_ALIGNED(sector, alignment >> SECTOR_SHIFT);
+}
+
static inline bool bdev_can_atomic_write(struct block_device *bdev)
{
struct request_queue *bd_queue = bdev->bd_queue;
@@ -1707,15 +1723,9 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
if (!limits->atomic_write_unit_min)
return false;
- if (bdev_is_partition(bdev)) {
- sector_t bd_start_sect = bdev->bd_start_sect;
- unsigned int alignment =
- max(limits->atomic_write_unit_min,
- limits->atomic_write_hw_boundary);
-
- if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT))
- return false;
- }
+ if (bdev_is_partition(bdev))
+ return blk_atomic_write_start_sect_aligned(bdev->bd_start_sect,
+ limits);
return true;
}
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f41c7f0ef91e..ba8f52d48b94 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -286,12 +286,7 @@ static inline void *bvec_virt(struct bio_vec *bvec)
*/
static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
{
- /*
- * Note this open codes page_to_phys because page_to_phys is defined in
- * <asm/io.h>, which we don't want to pull in here. If it ever moves to
- * a sensible place we should start using it.
- */
- return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+ return page_to_phys(bvec->bv_page) + bvec->bv_offset;
}
#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index caa4b4430634..0bf7d33a1048 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -82,6 +82,14 @@ enum cc_attr {
CC_ATTR_GUEST_SEV_SNP,
/**
+ * @CC_ATTR_GUEST_SNP_SECURE_TSC: SNP Secure TSC is active.
+ *
+ * The platform/OS is running as a guest/virtual machine and actively
+ * using AMD SEV-SNP Secure TSC feature.
+ */
+ CC_ATTR_GUEST_SNP_SECURE_TSC,
+
+ /**
* @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
*
* The host kernel is running with the necessary features
diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h
new file mode 100644
index 000000000000..dd4869f1d736
--- /dev/null
+++ b/include/linux/cgroup_dmem.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _CGROUP_DMEM_H
+#define _CGROUP_DMEM_H
+
+#include <linux/types.h>
+#include <linux/llist.h>
+
+struct dmem_cgroup_pool_state;
+
+/* Opaque definition of a cgroup region, used internally */
+struct dmem_cgroup_region;
+
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) __printf(2,3);
+void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region);
+int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
+ struct dmem_cgroup_pool_state **ret_pool,
+ struct dmem_cgroup_pool_state **ret_limit_pool);
+void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size);
+bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
+ struct dmem_cgroup_pool_state *test_pool,
+ bool ignore_low, bool *ret_hit_low);
+
+void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool);
+#else
+static inline __printf(2,3) struct dmem_cgroup_region *
+dmem_cgroup_register_region(u64 size, const char *name_fmt, ...)
+{
+ return NULL;
+}
+
+static inline void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
+{ }
+
+static inline int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
+ struct dmem_cgroup_pool_state **ret_pool,
+ struct dmem_cgroup_pool_state **ret_limit_pool)
+{
+ *ret_pool = NULL;
+
+ if (ret_limit_pool)
+ *ret_limit_pool = NULL;
+
+ return 0;
+}
+
+static inline void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
+{ }
+
+static inline
+bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
+ struct dmem_cgroup_pool_state *test_pool,
+ bool ignore_low, bool *ret_hit_low)
+{
+ return true;
+}
+
+static inline void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
+{ }
+
+#endif
+#endif /* _CGROUP_DMEM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 445235487230..3fd0bcbf3080 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -65,6 +65,10 @@ SUBSYS(rdma)
SUBSYS(misc)
#endif
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+SUBSYS(dmem)
+#endif
+
/*
* The following subsystems are not supported on the default hierarchy.
*/
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index d0ed9583743f..c9b58188ec61 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -52,18 +52,6 @@
*/
#define barrier_before_unreachable() asm volatile("")
-/*
- * Mark a position in code as unreachable. This can be used to
- * suppress control flow warnings after asm blocks that transfer
- * control elsewhere.
- */
-#define unreachable() \
- do { \
- annotate_unreachable(); \
- barrier_before_unreachable(); \
- __builtin_unreachable(); \
- } while (0)
-
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 240c632c5b95..efd43df3a99a 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -109,44 +109,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
/* Unreachable code */
#ifdef CONFIG_OBJTOOL
-/*
- * These macros help objtool understand GCC code flow for unreachable code.
- * The __COUNTER__ based labels are a hack to make each instance of the macros
- * unique, to convince GCC not to merge duplicate inline asm statements.
- */
-#define __stringify_label(n) #n
-
-#define __annotate_reachable(c) ({ \
- asm volatile(__stringify_label(c) ":\n\t" \
- ".pushsection .discard.reachable\n\t" \
- ".long " __stringify_label(c) "b - .\n\t" \
- ".popsection\n\t"); \
-})
-#define annotate_reachable() __annotate_reachable(__COUNTER__)
-
-#define __annotate_unreachable(c) ({ \
- asm volatile(__stringify_label(c) ":\n\t" \
- ".pushsection .discard.unreachable\n\t" \
- ".long " __stringify_label(c) "b - .\n\t" \
- ".popsection\n\t" : : "i" (c)); \
-})
-#define annotate_unreachable() __annotate_unreachable(__COUNTER__)
-
/* Annotate a C jump table to allow objtool to follow the code flow */
#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #")
-
#else /* !CONFIG_OBJTOOL */
-#define annotate_reachable()
-#define annotate_unreachable()
#define __annotate_jump_table
#endif /* CONFIG_OBJTOOL */
-#ifndef unreachable
-# define unreachable() do { \
- annotate_unreachable(); \
+/*
+ * Mark a position in code as unreachable. This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ */
+#define unreachable() do { \
+ barrier_before_unreachable(); \
__builtin_unreachable(); \
} while (0)
-#endif
/*
* KENTRY - kernel entry point
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 45e598fe3476..77e6e195d1d6 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -52,8 +52,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
#define __COREDUMP_PRINTK(Level, Format, ...) \
do { \
char comm[TASK_COMM_LEN]; \
- \
- get_task_comm(comm, current); \
+ /* This will always be NUL terminated. */ \
+ memcpy(comm, current->comm, sizeof(comm)); \
printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \
task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \
} while (0) \
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index a04b73c40173..6cc5e484547c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -240,6 +240,7 @@ enum cpuhp_state {
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RANDOM_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
+ CPUHP_AP_KTHREADS_ONLINE,
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index e4a3155fe409..0c3c4b16b469 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void);
extern struct cred *prepare_exec_creds(void);
extern int commit_creds(struct cred *);
extern void abort_creds(struct cred *);
-extern const struct cred *override_creds(const struct cred *);
-extern void revert_creds(const struct cred *);
extern struct cred *prepare_kernel_cred(struct task_struct *);
extern int set_security_override(struct cred *, u32);
extern int set_security_override_from_ctx(struct cred *, const char *);
@@ -172,12 +170,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
cred->cap_inheritable));
}
-/*
- * Override creds without bumping reference count. Caller must ensure
- * reference remains valid or has taken reference. Almost always not the
- * interface you want. Use override_creds()/revert_creds() instead.
- */
-static inline const struct cred *override_creds_light(const struct cred *override_cred)
+static inline const struct cred *override_creds(const struct cred *override_cred)
{
const struct cred *old = current->cred;
@@ -185,35 +178,12 @@ static inline const struct cred *override_creds_light(const struct cred *overrid
return old;
}
-static inline void revert_creds_light(const struct cred *revert_cred)
-{
- rcu_assign_pointer(current->cred, revert_cred);
-}
-
-/**
- * get_new_cred_many - Get references on a new set of credentials
- * @cred: The new credentials to reference
- * @nr: Number of references to acquire
- *
- * Get references on the specified set of new credentials. The caller must
- * release all acquired references.
- */
-static inline struct cred *get_new_cred_many(struct cred *cred, int nr)
+static inline const struct cred *revert_creds(const struct cred *revert_cred)
{
- atomic_long_add(nr, &cred->usage);
- return cred;
-}
+ const struct cred *override_cred = current->cred;
-/**
- * get_new_cred - Get a reference on a new set of credentials
- * @cred: The new credentials to reference
- *
- * Get a reference on the specified set of new credentials. The caller must
- * release the reference.
- */
-static inline struct cred *get_new_cred(struct cred *cred)
-{
- return get_new_cred_many(cred, 1);
+ rcu_assign_pointer(current->cred, revert_cred);
+ return override_cred;
}
/**
@@ -236,7 +206,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
if (!cred)
return cred;
nonconst_cred->non_rcu = 0;
- return get_new_cred_many(nonconst_cred, nr);
+ atomic_long_add(nr, &nonconst_cred->usage);
+ return cred;
}
/*
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index cdc4757217f9..b18658bce2c3 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -48,6 +48,7 @@ struct fwnode_handle;
* will never get called until they do.
* @remove: Called when a device removed from this bus.
* @shutdown: Called at shut-down time to quiesce the device.
+ * @irq_get_affinity: Get IRQ affinity mask for the device on this bus.
*
* @online: Called to put the device back online (after offlining it).
* @offline: Called to put the device offline for hot-removal. May fail.
@@ -87,6 +88,8 @@ struct bus_type {
void (*sync_state)(struct device *dev);
void (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);
+ const struct cpumask *(*irq_get_affinity)(struct device *dev,
+ unsigned int irq_vec);
int (*online)(struct device *dev);
int (*offline)(struct device *dev);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 4cc8801e50e3..a087606ace19 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -3,6 +3,7 @@
#define LINUX_EXPORTFS_H 1
#include <linux/types.h>
+#include <linux/path.h>
struct dentry;
struct iattr;
@@ -156,6 +157,17 @@ struct fid {
};
};
+enum handle_to_path_flags {
+ HANDLE_CHECK_PERMS = (1 << 0),
+ HANDLE_CHECK_SUBTREE = (1 << 1),
+};
+
+struct handle_to_path_ctx {
+ struct path root;
+ enum handle_to_path_flags flags;
+ unsigned int fh_flags;
+};
+
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
@@ -225,6 +237,12 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * permission:
+ * Allow filesystems to specify a custom permission function.
+ *
+ * open:
+ * Allow filesystems to specify a custom open function.
+ *
* commit_metadata:
* @commit_metadata should commit metadata changes to stable storage.
*
@@ -251,6 +269,8 @@ struct export_operations {
bool write, u32 *device_generation);
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
int nr_iomaps, struct iattr *iattr);
+ int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
+ struct file * (*open)(struct path *path, unsigned int oflags);
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index c50882f19235..966092ffa89a 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -5,12 +5,18 @@
#include <uapi/linux/fiemap.h>
#include <linux/fs.h>
+/**
+ * struct fiemap_extent_info - fiemap request to a filesystem
+ * @fi_flags: Flags as passed from user
+ * @fi_extents_mapped: Number of mapped extents
+ * @fi_extents_max: Size of fiemap_extent array
+ * @fi_extents_start: Start of fiemap_extent array
+ */
struct fiemap_extent_info {
- unsigned int fi_flags; /* Flags as passed from user */
- unsigned int fi_extents_mapped; /* Number of mapped extents */
- unsigned int fi_extents_max; /* Size of fiemap_extent array */
- struct fiemap_extent __user *fi_extents_start; /* Start of
- fiemap_extent array */
+ unsigned int fi_flags;
+ unsigned int fi_extents_mapped;
+ unsigned int fi_extents_max;
+ struct fiemap_extent __user *fi_extents_start;
};
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 3abe614ef5f0..4d3f8074c137 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -37,16 +37,20 @@ struct folio_queue {
#if PAGEVEC_SIZE > BITS_PER_LONG
#error marks is not big enough
#endif
+ unsigned int rreq_id;
+ unsigned int debug_id;
};
/**
* folioq_init - Initialise a folio queue segment
* @folioq: The segment to initialise
+ * @rreq_id: The request identifier to use in tracelines.
*
- * Initialise a folio queue segment. Note that the folio pointers are
- * left uninitialised.
+ * Initialise a folio queue segment and set an identifier to be used in traces.
+ *
+ * Note that the folio pointers are left uninitialised.
*/
-static inline void folioq_init(struct folio_queue *folioq)
+static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id)
{
folio_batch_init(&folioq->vec);
folioq->next = NULL;
@@ -54,6 +58,8 @@ static inline void folioq_init(struct folio_queue *folioq)
folioq->marks = 0;
folioq->marks2 = 0;
folioq->marks3 = 0;
+ folioq->rreq_id = rreq_id;
+ folioq->debug_id = 0;
}
/**
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index f39869588117..702099f08929 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -5,47 +5,68 @@
#include <linux/compiler.h>
#include <linux/ftrace.h>
-#include <linux/rethook.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
struct fprobe;
-
typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *regs,
void *entry_data);
typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *regs,
void *entry_data);
/**
+ * struct fprobe_hlist_node - address based hash list node for fprobe.
+ *
+ * @hlist: The hlist node for address search hash table.
+ * @addr: One of the probing address of @fp.
+ * @fp: The fprobe which owns this.
+ */
+struct fprobe_hlist_node {
+ struct hlist_node hlist;
+ unsigned long addr;
+ struct fprobe *fp;
+};
+
+/**
+ * struct fprobe_hlist - hash list nodes for fprobe.
+ *
+ * @hlist: The hlist node for existence checking hash table.
+ * @rcu: rcu_head for RCU deferred release.
+ * @fp: The fprobe which owns this fprobe_hlist.
+ * @size: The size of @array.
+ * @array: The fprobe_hlist_node for each address to probe.
+ */
+struct fprobe_hlist {
+ struct hlist_node hlist;
+ struct rcu_head rcu;
+ struct fprobe *fp;
+ int size;
+ struct fprobe_hlist_node array[] __counted_by(size);
+};
+
+/**
* struct fprobe - ftrace based probe.
- * @ops: The ftrace_ops.
+ *
* @nmissed: The counter for missing events.
* @flags: The status flag.
- * @rethook: The rethook data structure. (internal data)
* @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions.
* @entry_handler: The callback function for function entry.
* @exit_handler: The callback function for function exit.
+ * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
*/
struct fprobe {
-#ifdef CONFIG_FUNCTION_TRACER
- /*
- * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too.
- * But user of fprobe may keep embedding the struct fprobe on their own
- * code. To avoid build error, this will keep the fprobe data structure
- * defined here, but remove ftrace_ops data structure.
- */
- struct ftrace_ops ops;
-#endif
unsigned long nmissed;
unsigned int flags;
- struct rethook *rethook;
size_t entry_data_size;
- int nr_maxactive;
fprobe_entry_cb entry_handler;
fprobe_exit_cb exit_handler;
+
+ struct fprobe_hlist *hlist_array;
};
/* This fprobe is soft-disabled. */
@@ -121,4 +142,9 @@ static inline void enable_fprobe(struct fprobe *fp)
fp->flags &= ~FPROBE_FL_DISABLED;
}
+/* The entry data size is 4 bits (=16) * sizeof(long) in maximum */
+#define FPROBE_DATA_SIZE_BITS 4
+#define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1)
+#define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long))
+
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e29433c5ecc..a4af70367f8a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -322,6 +322,7 @@ struct readahead_control;
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
#define IOCB_APPEND (__force int) RWF_APPEND
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
+#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -348,6 +349,7 @@ struct readahead_control;
#define IOCB_DIO_CALLER_COMP (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
#define IOCB_AIO_RW (1 << 23)
+#define IOCB_HAS_METADATA (1 << 24)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -356,7 +358,8 @@ struct readahead_control;
{ IOCB_SYNC, "SYNC" }, \
{ IOCB_NOWAIT, "NOWAIT" }, \
{ IOCB_APPEND, "APPEND" }, \
- { IOCB_ATOMIC, "ATOMIC"}, \
+ { IOCB_ATOMIC, "ATOMIC" }, \
+ { IOCB_DONTCACHE, "DONTCACHE" }, \
{ IOCB_EVENTFD, "EVENTFD"}, \
{ IOCB_DIRECT, "DIRECT" }, \
{ IOCB_WRITE, "WRITE" }, \
@@ -626,6 +629,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
#define IOP_MGTIME 0x0020
+#define IOP_CACHED_LINK 0x0040
/*
* Keep mostly read-only and often accessed (especially for
@@ -723,7 +727,10 @@ struct inode {
};
struct file_lock_context *i_flctx;
struct address_space i_data;
- struct list_head i_devices;
+ union {
+ struct list_head i_devices;
+ int i_linklen;
+ };
union {
struct pipe_inode_info *i_pipe;
struct cdev *i_cdev;
@@ -749,6 +756,13 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
+{
+ inode->i_link = link;
+ inode->i_linklen = linklen;
+ inode->i_opflags |= IOP_CACHED_LINK;
+}
+
/*
* Get bit address from inode->i_state to use with wait_var_event()
* infrastructre.
@@ -2127,6 +2141,8 @@ struct file_operations {
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
+/* File system supports uncached read/write buffered IO */
+#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -3351,7 +3367,7 @@ extern const struct file_operations generic_ro_fops;
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-extern int readlink_copy(char __user *, int, const char *);
+extern int readlink_copy(char __user *, int, const char *, int);
extern int page_readlink(struct dentry *, char __user *, int);
extern const char *page_get_link(struct dentry *, struct inode *,
struct delayed_call *);
@@ -3468,7 +3484,6 @@ struct offset_ctx {
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
-int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
@@ -3614,6 +3629,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
return -EOPNOTSUPP;
}
+ if (flags & RWF_DONTCACHE) {
+ /* file system must support it */
+ if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE))
+ return -EOPNOTSUPP;
+ /* DAX mappings not supported */
+ if (IS_DAX(ki->ki_filp->f_mapping->host))
+ return -EOPNOTSUPP;
+ }
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 3cef566088fc..53e566efd5fd 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -84,6 +84,8 @@ extern int fs_lookup_param(struct fs_context *fc,
extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);
+extern const struct constant_table bool_names[];
+
#ifdef CONFIG_VALIDATE_FS_PARSER
extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
int low, int high, int special);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aa9ddd1e4bb6..07092dfb21a4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -43,9 +43,8 @@ struct dyn_ftrace;
char *arch_ftrace_match_adjust(char *str, const char *search);
-#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
-struct fgraph_ret_regs;
-unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
+unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs);
#else
unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
#endif
@@ -134,6 +133,13 @@ extern int ftrace_enabled;
* Also, architecture dependent fields can be used for internal process.
* (e.g. orig_ax on x86_64)
*
+ * Basically, ftrace_regs stores the registers related to the context.
+ * On function entry, registers for function parameters and hooking the
+ * function call are stored, and on function exit, registers for function
+ * return value and frame pointers are stored.
+ *
+ * And also, it dpends on the context that which registers are restored
+ * from the ftrace_regs.
* On the function entry, those registers will be restored except for
* the stack pointer, so that user can change the function parameters
* and instruction pointer (e.g. live patching.)
@@ -170,6 +176,12 @@ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+#ifdef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+
+static_assert(sizeof(struct pt_regs) == ftrace_regs_size());
+
+#endif /* CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */
+
static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs)
{
if (!fregs)
@@ -178,6 +190,54 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
}
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+ defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ /*
+ * If CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y, ftrace_regs memory
+ * layout is including pt_regs. So always returns that address.
+ * Since arch_ftrace_get_regs() will check some members and may return
+ * NULL, we can not use it.
+ */
+ return &arch_ftrace_regs(fregs)->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */
+
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ * - user_mode(_regs) returns false (always kernel mode).
+ * - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ arch_ftrace_fill_perf_regs(fregs, regs);
+ return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ return &arch_ftrace_regs(fregs)->regs;
+}
+
+#endif
+
/*
* When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
* Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
@@ -190,6 +250,23 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
return ftrace_get_regs(fregs) != NULL;
}
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+ unsigned long *stackp;
+
+ stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+ if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+ ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+ return *(stackp + nth);
+
+ return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth) (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
@@ -545,6 +622,19 @@ enum {
FTRACE_MAY_SLEEP = (1 << 5),
};
+/* Arches can override ftrace_get_symaddr() to convert fentry_ip to symaddr. */
+#ifndef ftrace_get_symaddr
+/**
+ * ftrace_get_symaddr - return the symbol address from fentry_ip
+ * @fentry_ip: the address of ftrace location
+ *
+ * Get the symbol address from @fentry_ip (fast path). If there is no fast
+ * search path, this returns 0.
+ * User may need to use kallsyms API to find the symbol address.
+ */
+#define ftrace_get_symaddr(fentry_ip) (0)
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
void ftrace_arch_code_modify_prepare(void);
@@ -1069,12 +1159,15 @@ struct fgraph_ops;
/* Type of the callback handlers for tracing function graph*/
typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
- struct fgraph_ops *); /* return */
+ struct fgraph_ops *,
+ struct ftrace_regs *); /* return */
typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
- struct fgraph_ops *); /* entry */
+ struct fgraph_ops *,
+ struct ftrace_regs *); /* entry */
extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops);
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
bool ftrace_pids_enabled(struct ftrace_ops *ops);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -1114,8 +1207,15 @@ struct ftrace_ret_stack {
extern void return_to_handler(void);
extern int
-function_graph_enter(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp);
+function_graph_enter_regs(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct ftrace_regs *fregs);
+
+static inline int function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long fp, unsigned long *retp)
+{
+ return function_graph_enter_regs(ret, func, fp, retp, NULL);
+}
struct ftrace_ret_stack *
ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index be1ed0c891d0..bbc1873ca6b8 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -30,6 +30,8 @@ struct ftrace_regs;
override_function_with_return(&arch_ftrace_regs(fregs)->regs)
#define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
+#define ftrace_regs_get_frame_pointer(fregs) \
+ frame_pointer(&arch_ftrace_regs(fregs)->regs)
#endif /* HAVE_ARCH_FTRACE_REGS */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7ef5f7ef31a9..f7bfdcf0dda3 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -386,6 +386,7 @@ extern void __init hrtimers_init(void);
extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
+int hrtimers_cpu_starting(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
#else
diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
index bc7babe91b2e..bf675a8aef8a 100644
--- a/include/linux/instrumentation.h
+++ b/include/linux/instrumentation.h
@@ -4,14 +4,14 @@
#ifdef CONFIG_NOINSTR_VALIDATION
+#include <linux/objtool.h>
#include <linux/stringify.h>
/* Begin/end of an instrumentation safe region */
#define __instrumentation_begin(c) ({ \
asm volatile(__stringify(c) ": nop\n\t" \
- ".pushsection .discard.instr_begin\n\t" \
- ".long " __stringify(c) "b - .\n\t" \
- ".popsection\n\t" : : "i" (c)); \
+ ANNOTATE_INSTR_BEGIN(__ASM_BREF(c)) \
+ : : "i" (c)); \
})
#define instrumentation_begin() __instrumentation_begin(__COUNTER__)
@@ -48,9 +48,8 @@
*/
#define __instrumentation_end(c) ({ \
asm volatile(__stringify(c) ": nop\n\t" \
- ".pushsection .discard.instr_end\n\t" \
- ".long " __stringify(c) "b - .\n\t" \
- ".popsection\n\t" : : "i" (c)); \
+ ANNOTATE_INSTR_END(__ASM_BREF(c)) \
+ : : "i" (c)); \
})
#define instrumentation_end() __instrumentation_end(__COUNTER__)
#else /* !CONFIG_NOINSTR_VALIDATION */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index fd4cdb0860a2..623d8e798a11 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -78,8 +78,9 @@ struct io_hash_table {
struct io_mapped_region {
struct page **pages;
- void *vmap_ptr;
- size_t nr_pages;
+ void *ptr;
+ unsigned nr_pages;
+ unsigned flags;
};
/*
@@ -293,6 +294,11 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
+ /*
+ * Modifications are protected by ->uring_lock and ->mmap_lock.
+ * The flags, buf_pages and buf_nr_pages fields should be stable
+ * once published.
+ */
struct xarray io_bl_xa;
struct io_hash_table cancel_table;
@@ -424,17 +430,10 @@ struct io_ring_ctx {
* side will need to grab this lock, to prevent either side from
* being run concurrently with the other.
*/
- struct mutex resize_lock;
-
- /*
- * If IORING_SETUP_NO_MMAP is used, then the below holds
- * the gup'ed pages for the two rings, and the sqes.
- */
- unsigned short n_ring_pages;
- unsigned short n_sqe_pages;
- struct page **ring_pages;
- struct page **sqe_pages;
+ struct mutex mmap_lock;
+ struct io_mapped_region sq_region;
+ struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
};
@@ -481,6 +480,7 @@ enum {
REQ_F_BL_NO_RECYCLE_BIT,
REQ_F_BUFFERS_COMMIT_BIT,
REQ_F_BUF_NODE_BIT,
+ REQ_F_HAS_METADATA_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -561,6 +561,8 @@ enum {
REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
/* buf node is valid */
REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
+ /* request has read/write metadata assigned */
+ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fa711f80957b..8daa17f0107a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -64,7 +64,6 @@ enum irqchip_irq_state;
* IRQ_NOAUTOEN - Interrupt is not automatically enabled in
* request/setup_irq()
* IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
- * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
* IRQ_NESTED_THREAD - Interrupt nests into another thread
* IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable
* IRQ_IS_POLLED - Always polled by another interrupt. Exclude
@@ -93,7 +92,6 @@ enum {
IRQ_NOREQUEST = (1 << 11),
IRQ_NOAUTOEN = (1 << 12),
IRQ_NO_BALANCING = (1 << 13),
- IRQ_MOVE_PCNTXT = (1 << 14),
IRQ_NESTED_THREAD = (1 << 15),
IRQ_NOTHREAD = (1 << 16),
IRQ_PER_CPU_DEVID = (1 << 17),
@@ -105,7 +103,7 @@ enum {
#define IRQF_MODIFY_MASK \
(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
- IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
+ IRQ_NOAUTOEN | IRQ_LEVEL | IRQ_NO_BALANCING | \
IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN)
@@ -201,8 +199,6 @@ struct irq_data {
* IRQD_LEVEL - Interrupt is level triggered
* IRQD_WAKEUP_STATE - Interrupt is configured for wakeup
* from suspend
- * IRQD_MOVE_PCNTXT - Interrupt can be moved in process
- * context
* IRQD_IRQ_DISABLED - Disabled state of the interrupt
* IRQD_IRQ_MASKED - Masked state of the interrupt
* IRQD_IRQ_INPROGRESS - In progress state of the interrupt
@@ -233,7 +229,6 @@ enum {
IRQD_AFFINITY_SET = BIT(12),
IRQD_LEVEL = BIT(13),
IRQD_WAKEUP_STATE = BIT(14),
- IRQD_MOVE_PCNTXT = BIT(15),
IRQD_IRQ_DISABLED = BIT(16),
IRQD_IRQ_MASKED = BIT(17),
IRQD_IRQ_INPROGRESS = BIT(18),
@@ -338,11 +333,6 @@ static inline bool irqd_is_wakeup_set(struct irq_data *d)
return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
}
-static inline bool irqd_can_move_in_process_context(struct irq_data *d)
-{
- return __irqd_to_state(d) & IRQD_MOVE_PCNTXT;
-}
-
static inline bool irqd_irq_disabled(struct irq_data *d)
{
return __irqd_to_state(d) & IRQD_IRQ_DISABLED;
@@ -567,6 +557,7 @@ struct irq_chip {
* in the suspend path if they are in disabled state
* IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup
* IRQCHIP_IMMUTABLE: Don't ever change anything in this chip
+ * IRQCHIP_MOVE_DEFERRED: Move the interrupt in actual interrupt context
*/
enum {
IRQCHIP_SET_TYPE_MASKED = (1 << 0),
@@ -581,6 +572,7 @@ enum {
IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9),
IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10),
IRQCHIP_IMMUTABLE = (1 << 11),
+ IRQCHIP_MOVE_DEFERRED = (1 << 12),
};
#include <linux/irqdesc.h>
@@ -694,6 +686,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data);
extern void irq_chip_release_resources_parent(struct irq_data *data);
#endif
+/* Disable or mask interrupts during a kernel kexec */
+extern void machine_kexec_mask_interrupts(void);
+
/* Handling of unhandled and spurious interrupts: */
extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
diff --git a/include/linux/kref.h b/include/linux/kref.h
index d32e21a2538c..88e82ab1367c 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -46,18 +46,18 @@ static inline void kref_get(struct kref *kref)
}
/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
+ * kref_put - Decrement refcount for object
+ * @kref: Object
+ * @release: Pointer to the function that will clean up the object when the
* last reference to the object is released.
- * This pointer is required, and it is not acceptable to pass kfree
- * in as this function.
*
- * Decrement the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0. Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory. Only use the return value if you want to see if the kref is now
- * gone, not present.
+ * Decrement the refcount, and if 0, call @release. The caller may not
+ * pass NULL or kfree() as the release function.
+ *
+ * Return: 1 if this call removed the object, otherwise return 0. Beware,
+ * if this function returns 0, another caller may have removed the object
+ * by the time this function returns. The return value is only certain
+ * if you want to see if the object is definitely released.
*/
static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
@@ -68,17 +68,37 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
return 0;
}
+/**
+ * kref_put_mutex - Decrement refcount for object
+ * @kref: Object
+ * @release: Pointer to the function that will clean up the object when the
+ * last reference to the object is released.
+ * @mutex: Mutex which protects the release function.
+ *
+ * This variant of kref_lock() calls the @release function with the @mutex
+ * held. The @release function will release the mutex.
+ */
static inline int kref_put_mutex(struct kref *kref,
void (*release)(struct kref *kref),
- struct mutex *lock)
+ struct mutex *mutex)
{
- if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) {
+ if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) {
release(kref);
return 1;
}
return 0;
}
+/**
+ * kref_put_lock - Decrement refcount for object
+ * @kref: Object
+ * @release: Pointer to the function that will clean up the object when the
+ * last reference to the object is released.
+ * @lock: Spinlock which protects the release function.
+ *
+ * This variant of kref_lock() calls the @release function with the @lock
+ * held. The @release function will release the lock.
+ */
static inline int kref_put_lock(struct kref *kref,
void (*release)(struct kref *kref),
spinlock_t *lock)
@@ -94,8 +114,6 @@ static inline int kref_put_lock(struct kref *kref,
* kref_get_unless_zero - Increment refcount for object unless it is zero.
* @kref: object.
*
- * Return non-zero if the increment succeeded. Otherwise return 0.
- *
* This function is intended to simplify locking around refcounting for
* objects that can be looked up from a lookup structure, and which are
* removed from that lookup structure in the object destructor.
@@ -105,6 +123,8 @@ static inline int kref_put_lock(struct kref *kref,
* With a lookup followed by a kref_get_unless_zero *with return value check*
* locking in the kref_put path can be deferred to the actual removal from
* the lookup structure and RCU lookups become trivial.
+ *
+ * Return: non-zero if the increment succeeded. Otherwise return 0.
*/
static inline int __must_check kref_get_unless_zero(struct kref *kref)
{
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index b11f53c1ba2e..8d27403888ce 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
+int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
int kthread_stop_put(struct task_struct *k);
bool kthread_should_stop(void);
@@ -186,13 +187,58 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
int kthread_worker_fn(void *worker_ptr);
-__printf(2, 3)
-struct kthread_worker *
-kthread_create_worker(unsigned int flags, const char namefmt[], ...);
+__printf(3, 4)
+struct kthread_worker *kthread_create_worker_on_node(unsigned int flags,
+ int node,
+ const char namefmt[], ...);
+
+#define kthread_create_worker(flags, namefmt, ...) \
+ kthread_create_worker_on_node(flags, NUMA_NO_NODE, namefmt, ## __VA_ARGS__);
+
+/**
+ * kthread_run_worker - create and wake a kthread worker.
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: Convenient wrapper for kthread_create_worker() followed by
+ * wake_up_process(). Returns the kthread_worker or ERR_PTR(-ENOMEM).
+ */
+#define kthread_run_worker(flags, namefmt, ...) \
+({ \
+ struct kthread_worker *__kw \
+ = kthread_create_worker(flags, namefmt, ## __VA_ARGS__); \
+ if (!IS_ERR(__kw)) \
+ wake_up_process(__kw->task); \
+ __kw; \
+})
-__printf(3, 4) struct kthread_worker *
+struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
- const char namefmt[], ...);
+ const char namefmt[]);
+
+/**
+ * kthread_run_worker_on_cpu - create and wake a cpu bound kthread worker.
+ * @cpu: CPU number
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
+ *
+ * Description: Convenient wrapper for kthread_create_worker_on_cpu()
+ * followed by wake_up_process(). Returns the kthread_worker or
+ * ERR_PTR(-ENOMEM).
+ */
+static inline struct kthread_worker *
+kthread_run_worker_on_cpu(int cpu, unsigned int flags,
+ const char namefmt[])
+{
+ struct kthread_worker *kw;
+
+ kw = kthread_create_worker_on_cpu(cpu, flags, namefmt);
+ if (!IS_ERR(kw))
+ wake_up_process(kw->task);
+
+ return kw;
+}
bool kthread_queue_work(struct kthread_worker *worker,
struct kthread_work *work);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c1a85d46eba6..be5183d75736 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1467,13 +1467,13 @@ extern const struct attribute_group *ata_common_sdev_groups[];
#define ATA_SUBBASE_SHT(drv_name) \
__ATA_BASE_SHT(drv_name), \
.can_queue = ATA_DEF_QUEUE, \
- .tag_alloc_policy = BLK_TAG_ALLOC_RR, \
+ .tag_alloc_policy_rr = true, \
.device_configure = ata_scsi_device_configure
#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \
__ATA_BASE_SHT(drv_name), \
.can_queue = drv_qd, \
- .tag_alloc_policy = BLK_TAG_ALLOC_RR, \
+ .tag_alloc_policy_rr = true, \
.device_configure = ata_scsi_device_configure
#define ATA_BASE_SHT(drv_name) \
diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h
index fc388da6a027..0d68f9d6a6a7 100644
--- a/include/linux/libgcc.h
+++ b/include/linux/libgcc.h
@@ -34,4 +34,8 @@ long long notrace __lshrdi3(long long u, word_type b);
long long notrace __muldi3(long long u, long long v);
word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b);
+#ifdef CONFIG_HAVE_ARCH_LIBGCC_H
+#include <asm/libgcc.h>
+#endif
+
#endif /* __ASM_LIBGCC_H */
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index c3a1f78bc884..c39f119659ba 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -34,14 +34,24 @@ struct lockref {
};
};
-extern void lockref_get(struct lockref *);
-extern int lockref_put_return(struct lockref *);
-extern int lockref_get_not_zero(struct lockref *);
-extern int lockref_put_not_zero(struct lockref *);
-extern int lockref_put_or_lock(struct lockref *);
-
-extern void lockref_mark_dead(struct lockref *);
-extern int lockref_get_not_dead(struct lockref *);
+/**
+ * lockref_init - Initialize a lockref
+ * @lockref: pointer to lockref structure
+ * @count: initial count
+ */
+static inline void lockref_init(struct lockref *lockref, unsigned int count)
+{
+ spin_lock_init(&lockref->lock);
+ lockref->count = count;
+}
+
+void lockref_get(struct lockref *lockref);
+int lockref_put_return(struct lockref *lockref);
+bool lockref_get_not_zero(struct lockref *lockref);
+bool lockref_put_or_lock(struct lockref *lockref);
+
+void lockref_mark_dead(struct lockref *lockref);
+bool lockref_get_not_dead(struct lockref *lockref);
/* Must be called under spinlock for reliable results */
static inline bool __lockref_is_dead(const struct lockref *l)
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 97a8b21eb033..e13d2f947b51 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -77,6 +77,7 @@ struct common_audit_data {
#define LSM_AUDIT_DATA_LOCKDOWN 15
#define LSM_AUDIT_DATA_NOTIFICATION 16
#define LSM_AUDIT_DATA_ANONINODE 17
+#define LSM_AUDIT_DATA_NLMSGTYPE 18
union {
struct path path;
struct dentry *dentry;
@@ -98,6 +99,7 @@ struct common_audit_data {
struct lsm_ibendport_audit *ibendport;
int reason;
const char *anonclass;
+ u16 nlmsg_type;
} u;
/* this union contains LSM specific data */
union {
@@ -116,14 +118,28 @@ struct common_audit_data {
#define v4info fam.v4
#define v6info fam.v6
+#ifdef CONFIG_AUDIT
+
int ipv4_skb_to_auditdata(struct sk_buff *skb,
struct common_audit_data *ad, u8 *proto);
+#if IS_ENABLED(CONFIG_IPV6)
int ipv6_skb_to_auditdata(struct sk_buff *skb,
struct common_audit_data *ad, u8 *proto);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
void common_lsm_audit(struct common_audit_data *a,
void (*pre_audit)(struct audit_buffer *, void *),
void (*post_audit)(struct audit_buffer *, void *));
+#else /* CONFIG_AUDIT */
+
+static inline void common_lsm_audit(struct common_audit_data *a,
+ void (*pre_audit)(struct audit_buffer *, void *),
+ void (*post_audit)(struct audit_buffer *, void *))
+{
+}
+
+#endif /* CONFIG_AUDIT */
+
#endif
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index eb2937599cb0..e2f1ce37c41e 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -83,7 +83,7 @@ LSM_HOOK(int, 0, move_mount, const struct path *from_path,
const struct path *to_path)
LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
int mode, const struct qstr *name, const char **xattr_name,
- void **ctx, u32 *ctxlen)
+ struct lsm_context *cp)
LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
struct qstr *name, const struct cred *old, struct cred *new)
@@ -295,17 +295,16 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name,
char **value)
LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
LSM_HOOK(int, 0, ismaclabel, const char *name)
-LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata,
- u32 *seclen)
+LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp)
LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop,
- char **secdata, u32 *seclen)
+ struct lsm_context *cp)
LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid)
-LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
+LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp)
LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen)
LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen)
-LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx,
- u32 *ctxlen)
+LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode,
+ struct lsm_context *cp)
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index e781727c8916..6325f6ffb895 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -15,8 +15,8 @@
*/
#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
struct _name { \
- int nr; \
- int size; \
+ size_t nr; \
+ size_t size; \
_type *data; \
_type preallocated[_nr]; \
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b1c3db9cf355..f02925447e59 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* we don't rely on for anything - the mm_lock_seq read against which we
* need ordering is below.
*/
- if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
return false;
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* after it has been unlocked.
* This pairs with RELEASE semantics in vma_end_write_all().
*/
- if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
+ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
up_read(&vma->vm_lock->lock);
return false;
}
@@ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
}
/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
-static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
+static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
{
mmap_assert_write_locked(vma->vm_mm);
@@ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
* mm->mm_lock_seq can't be concurrently modified.
*/
- *mm_lock_seq = vma->vm_mm->mm_lock_seq;
+ *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
return (vma->vm_lock_seq == *mm_lock_seq);
}
@@ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
*/
static inline void vma_start_write(struct vm_area_struct *vma)
{
- int mm_lock_seq;
+ unsigned int mm_lock_seq;
if (__is_vma_write_locked(vma, &mm_lock_seq))
return;
@@ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
- int mm_lock_seq;
+ unsigned int mm_lock_seq;
VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 332cee285662..825c04b56403 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -727,7 +727,7 @@ struct vm_area_struct {
* counter reuse can only lead to occasional unnecessary use of the
* slowpath.
*/
- int vm_lock_seq;
+ unsigned int vm_lock_seq;
/* Unstable RCU readers are allowed to read this. */
struct vma_lock *vm_lock;
#endif
@@ -921,6 +921,9 @@ struct mm_struct {
* Roughly speaking, incrementing the sequence number is
* equivalent to releasing locks on VMAs; reading the sequence
* number can be part of taking a read lock on a VMA.
+ * Incremented every time mmap_lock is write-locked/unlocked.
+ * Initialized to 0, therefore odd values indicate mmap_lock
+ * is write-locked and even values that it's released.
*
* Can be modified under write mmap_lock using RELEASE
* semantics.
@@ -929,7 +932,7 @@ struct mm_struct {
* Can be read with ACQUIRE semantics if not holding write
* mmap_lock.
*/
- int mm_lock_seq;
+ seqcount_t mm_lock_seq;
#endif
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index de9dc20b01ba..45a21faa3ff6 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -71,39 +71,68 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
}
#ifdef CONFIG_PER_VMA_LOCK
-/*
- * Drop all currently-held per-VMA locks.
- * This is called from the mmap_lock implementation directly before releasing
- * a write-locked mmap_lock (or downgrading it to read-locked).
- * This should normally NOT be called manually from other places.
- * If you want to call this manually anyway, keep in mind that this will release
- * *all* VMA write locks, including ones from further up the stack.
- */
-static inline void vma_end_write_all(struct mm_struct *mm)
+
+static inline void mm_lock_seqcount_init(struct mm_struct *mm)
+{
+ seqcount_init(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
+{
+ do_raw_write_seqcount_begin(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_end(struct mm_struct *mm)
+{
+ ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
+ do_raw_write_seqcount_end(&mm->mm_lock_seq);
+}
+
+static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
{
- mmap_assert_write_locked(mm);
/*
- * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
- * mmap_lock being held.
- * We need RELEASE semantics here to ensure that preceding stores into
- * the VMA take effect before we unlock it with this store.
- * Pairs with ACQUIRE semantics in vma_start_read().
+ * Since mmap_lock is a sleeping lock, and waiting for it to become
+ * unlocked is more or less equivalent with taking it ourselves, don't
+ * bother with the speculative path if mmap_lock is already write-locked
+ * and take the slow path, which takes the lock.
*/
- smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
+ return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
+}
+
+static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
+{
+ return read_seqcount_retry(&mm->mm_lock_seq, seq);
}
-#else
-static inline void vma_end_write_all(struct mm_struct *mm) {}
-#endif
+
+#else /* CONFIG_PER_VMA_LOCK */
+
+static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
+
+static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
+{
+ return false;
+}
+
+static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
+{
+ return true;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
static inline void mmap_init_lock(struct mm_struct *mm)
{
init_rwsem(&mm->mmap_lock);
+ mm_lock_seqcount_init(mm);
}
static inline void mmap_write_lock(struct mm_struct *mm)
{
__mmap_lock_trace_start_locking(mm, true);
down_write(&mm->mmap_lock);
+ mm_lock_seqcount_begin(mm);
__mmap_lock_trace_acquire_returned(mm, true, true);
}
@@ -111,6 +140,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
__mmap_lock_trace_start_locking(mm, true);
down_write_nested(&mm->mmap_lock, subclass);
+ mm_lock_seqcount_begin(mm);
__mmap_lock_trace_acquire_returned(mm, true, true);
}
@@ -120,10 +150,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
__mmap_lock_trace_start_locking(mm, true);
ret = down_write_killable(&mm->mmap_lock);
+ if (!ret)
+ mm_lock_seqcount_begin(mm);
__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
return ret;
}
+/*
+ * Drop all currently-held per-VMA locks.
+ * This is called from the mmap_lock implementation directly before releasing
+ * a write-locked mmap_lock (or downgrading it to read-locked).
+ * This should normally NOT be called manually from other places.
+ * If you want to call this manually anyway, keep in mind that this will release
+ * *all* VMA write locks, including ones from further up the stack.
+ */
+static inline void vma_end_write_all(struct mm_struct *mm)
+{
+ mmap_assert_write_locked(mm);
+ mm_lock_seqcount_end(mm);
+}
+
static inline void mmap_write_unlock(struct mm_struct *mm)
{
__mmap_lock_trace_released(mm, true);
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index bbaec80c78c5..ac01dc4eb2ce 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -24,6 +24,7 @@ static inline void leave_mm(void) { }
#ifndef task_cpu_possible_mask
# define task_cpu_possible_mask(p) cpu_possible_mask
# define task_cpu_possible(cpu, p) true
+# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK)
#else
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4338b1b4ac44..d67614f7b7f1 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -700,6 +700,8 @@ struct x86_cpu_id {
#define X86_FAMILY_ANY 0
#define X86_MODEL_ANY 0
#define X86_STEPPING_ANY 0
+#define X86_STEP_MIN 0
+#define X86_STEP_MAX 0xf
#define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */
/*
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 04213d8ef837..dcc17ce8a959 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -75,7 +75,7 @@ struct vfsmount {
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
/* Pairs with smp_store_release() in do_idmap_mount(). */
- return smp_load_acquire(&mnt->mnt_idmap);
+ return READ_ONCE(mnt->mnt_idmap);
}
extern int mnt_want_write(struct vfsmount *mnt);
@@ -113,7 +113,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *);
extern void kern_unmount(struct vfsmount *mnt);
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char __user *,
+int do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
extern struct vfsmount *collect_mounts(const struct path *);
extern void drop_collected_mounts(struct vfsmount *);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ecdd5ced16a8..071d05d81d38 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -18,9 +18,11 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
+#include <linux/rolling_buffer.h>
enum netfs_sreq_ref_trace;
typedef struct mempool_s mempool_t;
+struct folio_queue;
/**
* folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
@@ -71,6 +73,7 @@ struct netfs_inode {
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
#define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */
+#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */
};
/*
@@ -178,9 +181,6 @@ struct netfs_io_subrequest {
unsigned long long start; /* Where to start the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
- size_t consumed; /* Amount of read data consumed */
- size_t prev_donated; /* Amount of data donated from previous subreq */
- size_t next_donated; /* Amount of data donated from next subreq */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
@@ -188,9 +188,6 @@ struct netfs_io_subrequest {
u8 retry_count; /* The number of retries (0 on initial pass) */
enum netfs_io_source source; /* Where to read from/write to */
unsigned char stream_nr; /* I/O stream this belongs to */
- unsigned char curr_folioq_slot; /* Folio currently being read */
- unsigned char curr_folio_order; /* Order of folio */
- struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
@@ -208,9 +205,11 @@ enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
+ NETFS_READ_SINGLE, /* This read should be treated as a single object */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */
+ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_WRITE, /* This is a direct I/O write */
@@ -231,16 +230,16 @@ struct netfs_io_request {
struct address_space *mapping; /* The mapping being accessed */
struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources;
+ struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */
struct readahead_control *ractl; /* Readahead descriptor */
struct list_head proc_link; /* Link in netfs_iorequests */
- struct list_head subrequests; /* Contributory I/O operations */
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
#define NR_IO_STREAMS 2 //wreq->nr_io_streams
struct netfs_group *group; /* Writeback group being written back */
- struct folio_queue *buffer; /* Head of I/O buffer */
- struct folio_queue *buffer_tail; /* Tail of I/O buffer */
- struct iov_iter iter; /* Unencrypted-side iterator */
- struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
+ struct rolling_buffer buffer; /* Unencrypted buffer */
+#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1
+#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2
+ wait_queue_head_t waitq; /* Processor waiter */
void *netfs_priv; /* Private data for the netfs */
void *netfs_priv2; /* Private data for the netfs */
struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
@@ -251,28 +250,28 @@ struct netfs_io_request {
atomic_t subreq_counter; /* Next subreq->debug_index */
unsigned int nr_group_rel; /* Number of refs to release on ->group */
spinlock_t lock; /* Lock for queuing subreqs */
- atomic_t nr_outstanding; /* Number of ops in progress */
unsigned long long submitted; /* Amount submitted for I/O so far */
unsigned long long len; /* Length of the request */
size_t transferred; /* Amount to be indicated as transferred */
long error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
- u8 buffer_head_slot; /* First slot in ->buffer */
- u8 buffer_tail_slot; /* Next slot in ->buffer_tail */
unsigned long long i_size; /* Size of the file */
unsigned long long start; /* Start position */
atomic64_t issued_to; /* Write issuer folio cursor */
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
+ unsigned long long abandon_to; /* Position to abandon folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
- size_t prev_donated; /* Fallback for subreq->prev_donated */
+ unsigned char front_folio_order; /* Order (size) of front folio */
refcount_t ref;
unsigned long flags;
+#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
+#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
#define NETFS_RREQ_BLOCKED 10 /* We blocked */
@@ -409,6 +408,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
struct netfs_group *netfs_group);
ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+/* Single, monolithic object read/write API. */
+void netfs_single_mark_inode_dirty(struct inode *inode);
+ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter);
+int netfs_writeback_single(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct iov_iter *iter);
+
/* Address operations API */
struct readahead_control;
void netfs_readahead(struct readahead_control *);
@@ -428,10 +434,8 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
/* (Sub)request management API. */
-void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
- bool was_async);
-void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
- int error, bool was_async);
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq);
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
@@ -453,6 +457,18 @@ void netfs_end_io_write(struct inode *inode);
int netfs_start_io_direct(struct inode *inode);
void netfs_end_io_direct(struct inode *inode);
+/* Miscellaneous APIs. */
+struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
+ unsigned int trace /*enum netfs_folioq_trace*/);
+void netfs_folioq_free(struct folio_queue *folioq,
+ unsigned int trace /*enum netfs_trace_folioq*/);
+
+/* Buffer wrangling helpers API. */
+int netfs_alloc_folioq_buffer(struct address_space *mapping,
+ struct folio_queue **_buffer,
+ size_t *_cur_size, ssize_t size, gfp_t gfp);
+void netfs_free_folioq_buffer(struct folio_queue *fq);
+
/**
* netfs_inode - Get the netfs inode context from the inode
* @inode: The inode to query
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 13377dde4527..fe3b60818fdc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -64,6 +64,7 @@ enum {
/* Transport Type codes for Discovery Log Page entry TRTYPE field */
enum {
+ NVMF_TRTYPE_PCI = 0, /* PCI */
NVMF_TRTYPE_RDMA = 1, /* RDMA */
NVMF_TRTYPE_FC = 2, /* Fibre Channel */
NVMF_TRTYPE_TCP = 3, /* TCP/IP */
@@ -275,6 +276,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
+ NVME_CTRL_ATTR_RHII = (1 << 18),
};
struct nvme_id_ctrl {
@@ -1896,6 +1898,46 @@ static inline bool nvme_is_fabrics(const struct nvme_command *cmd)
return cmd->common.opcode == nvme_fabrics_command;
}
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+const char *nvme_get_error_status_str(u16 status);
+const char *nvme_get_opcode_str(u8 opcode);
+const char *nvme_get_admin_opcode_str(u8 opcode);
+const char *nvme_get_fabrics_opcode_str(u8 opcode);
+#else /* CONFIG_NVME_VERBOSE_ERRORS */
+static inline const char *nvme_get_error_status_str(u16 status)
+{
+ return "I/O Error";
+}
+static inline const char *nvme_get_opcode_str(u8 opcode)
+{
+ return "I/O Cmd";
+}
+static inline const char *nvme_get_admin_opcode_str(u8 opcode)
+{
+ return "Admin Cmd";
+}
+
+static inline const char *nvme_get_fabrics_opcode_str(u8 opcode)
+{
+ return "Fabrics Cmd";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
+
+static inline const char *nvme_opcode_str(int qid, u8 opcode)
+{
+ return qid ? nvme_get_opcode_str(opcode) :
+ nvme_get_admin_opcode_str(opcode);
+}
+
+static inline const char *nvme_fabrics_opcode_str(
+ int qid, const struct nvme_command *cmd)
+{
+ if (nvme_is_fabrics(cmd))
+ return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype);
+
+ return nvme_opcode_str(qid, cmd->common.opcode);
+}
+
struct nvme_error_slot {
__le64 error_count;
__le16 sqid;
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index b3b8d3dab52d..c722a921165b 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -45,29 +45,25 @@
#define STACK_FRAME_NON_STANDARD_FP(func)
#endif
-#define ANNOTATE_NOENDBR \
- "986: \n\t" \
- ".pushsection .discard.noendbr\n\t" \
- ".long 986b\n\t" \
- ".popsection\n\t"
-
#define ASM_REACHABLE \
"998:\n\t" \
".pushsection .discard.reachable\n\t" \
".long 998b\n\t" \
".popsection\n\t"
-#else /* __ASSEMBLY__ */
+#define __ASM_BREF(label) label ## b
-/*
- * This macro indicates that the following intra-function call is valid.
- * Any non-annotated intra-function call will cause objtool to issue a warning.
- */
-#define ANNOTATE_INTRA_FUNCTION_CALL \
- 999: \
- .pushsection .discard.intra_function_calls; \
- .long 999b; \
- .popsection;
+#define __ASM_ANNOTATE(label, type) \
+ ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \
+ ".long " __stringify(label) " - .\n\t" \
+ ".long " __stringify(type) "\n\t" \
+ ".popsection\n\t"
+
+#define ASM_ANNOTATE(type) \
+ "911:\n\t" \
+ __ASM_ANNOTATE(911b, type)
+
+#else /* __ASSEMBLY__ */
/*
* In asm, there are two kinds of code: normal C-type callable functions and
@@ -115,34 +111,11 @@
#endif
.endm
-.macro ANNOTATE_NOENDBR
+.macro ANNOTATE type:req
.Lhere_\@:
- .pushsection .discard.noendbr
- .long .Lhere_\@
- .popsection
-.endm
-
-/*
- * Use objtool to validate the entry requirement that all code paths do
- * VALIDATE_UNRET_END before RET.
- *
- * NOTE: The macro must be used at the beginning of a global symbol, otherwise
- * it will be ignored.
- */
-.macro VALIDATE_UNRET_BEGIN
-#if defined(CONFIG_NOINSTR_VALIDATION) && \
- (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
-.Lhere_\@:
- .pushsection .discard.validate_unret
+ .pushsection .discard.annotate_insn,"M",@progbits,8
.long .Lhere_\@ - .
- .popsection
-#endif
-.endm
-
-.macro REACHABLE
-.Lhere_\@:
- .pushsection .discard.reachable
- .long .Lhere_\@
+ .long \type
.popsection
.endm
@@ -155,20 +128,77 @@
#define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t"
#define STACK_FRAME_NON_STANDARD(func)
#define STACK_FRAME_NON_STANDARD_FP(func)
-#define ANNOTATE_NOENDBR
-#define ASM_REACHABLE
+#define __ASM_ANNOTATE(label, type)
+#define ASM_ANNOTATE(type)
#else
-#define ANNOTATE_INTRA_FUNCTION_CALL
.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
-.macro ANNOTATE_NOENDBR
-.endm
-.macro REACHABLE
+.macro ANNOTATE type:req
.endm
#endif
#endif /* CONFIG_OBJTOOL */
+#ifndef __ASSEMBLY__
+/*
+ * Annotate away the various 'relocation to !ENDBR` complaints; knowing that
+ * these relocations will never be used for indirect calls.
+ */
+#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR)
+/*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE)
+/*
+ * See linux/instrumentation.h
+ */
+#define ANNOTATE_INSTR_BEGIN(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN)
+#define ANNOTATE_INSTR_END(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_END)
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS)
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL)
+/*
+ * Use objtool to validate the entry requirement that all code paths do
+ * VALIDATE_UNRET_END before RET.
+ *
+ * NOTE: The macro must be used at the beginning of a global symbol, otherwise
+ * it will be ignored.
+ */
+#define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN)
+/*
+ * This should be used to refer to an instruction that is considered
+ * terminating, like a noreturn CALL or UD2 when we know they are not -- eg
+ * WARN using UD2.
+ */
+#define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE)
+
+#else
+#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR
+#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE
+/* ANNOTATE_INSTR_BEGIN ANNOTATE type=ANNOTYPE_INSTR_BEGIN */
+/* ANNOTATE_INSTR_END ANNOTATE type=ANNOTYPE_INSTR_END */
+#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS
+#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL
+#define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN
+#define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE
+#endif
+
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
+#define VALIDATE_UNRET_BEGIN ANNOTATE_UNRET_BEGIN
+#else
+#define VALIDATE_UNRET_BEGIN
+#endif
+
#endif /* _LINUX_OBJTOOL_H */
diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h
index 453a4f4ef39d..df5d9fa84dba 100644
--- a/include/linux/objtool_types.h
+++ b/include/linux/objtool_types.h
@@ -54,4 +54,16 @@ struct unwind_hint {
#define UNWIND_HINT_TYPE_SAVE 6
#define UNWIND_HINT_TYPE_RESTORE 7
+/*
+ * Annotate types
+ */
+#define ANNOTYPE_NOENDBR 1
+#define ANNOTYPE_RETPOLINE_SAFE 2
+#define ANNOTYPE_INSTR_BEGIN 3
+#define ANNOTYPE_INSTR_END 4
+#define ANNOTYPE_UNRET_BEGIN 5
+#define ANNOTYPE_IGNORE_ALTS 6
+#define ANNOTYPE_INTRA_FUNCTION_CALL 7
+#define ANNOTYPE_REACHABLE 8
+
#endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 79dbd8bc35a7..46406f3fe34d 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -96,7 +96,7 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
counter->watermark = usage;
}
-#ifdef CONFIG_MEMCG
+#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM)
void page_counter_calculate_protection(struct page_counter *root,
struct page_counter *counter,
bool recursive_protection);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cb99ec8c9e96..8333f132f4a9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1279,6 +1279,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
{
int size = 1;
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
+ return;
+
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
@@ -1287,12 +1292,18 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
}
static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
+ struct perf_event *event,
struct perf_raw_record *raw)
{
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
int size;
+ if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
+ return;
+
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
@@ -1309,6 +1320,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
data->sample_flags |= PERF_SAMPLE_RAW;
}
+static inline bool has_branch_stack(struct perf_event *event)
+{
+ return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
struct perf_branch_stack *brs,
@@ -1316,6 +1332,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
{
int size = sizeof(u64); /* nr */
+ if (!has_branch_stack(event))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
+ return;
+
if (branch_sample_hw_index(event))
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
@@ -1669,6 +1690,8 @@ static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
}
+extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
+
extern void perf_event_init(void);
extern void perf_tp_event(u16 event_type, u64 count, void *record,
int entry_size, struct pt_regs *regs,
@@ -1705,11 +1728,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#endif
-static inline bool has_branch_stack(struct perf_event *event)
-{
- return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
-}
-
static inline bool needs_branch_stack(struct perf_event *event)
{
return event->attr.branch_sample_type != 0;
@@ -1879,6 +1897,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset)
{
return 0;
}
+static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index a3aad9b4074c..98837a1ff0f3 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -59,6 +59,7 @@ struct pid
spinlock_t lock;
struct dentry *stashed;
u64 ino;
+ struct rb_node pidfs_node;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct hlist_head inodes;
@@ -68,6 +69,7 @@ struct pid
struct upid numbers[];
};
+extern seqcount_spinlock_t pidmap_lock_seq;
extern struct pid init_struct_pid;
struct file;
@@ -106,9 +108,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type);
-extern int pid_max;
-extern int pid_max_min, pid_max_max;
-
/*
* look up a PID in the hash table. Must be called with the tasklist_lock
* or rcu_read_lock() held.
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f9f9931e02d6..7c67a5811199 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -30,6 +30,7 @@ struct pid_namespace {
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
+ int pid_max;
struct pid_namespace *parent;
#ifdef CONFIG_BSD_PROCESS_ACCT
struct fs_pin *bacct;
@@ -38,9 +39,14 @@ struct pid_namespace {
struct ucounts *ucounts;
int reboot; /* group exit code if this pidns was rebooted */
struct ns_common ns;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+ struct work_struct work;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_set set;
+ struct ctl_table_header *sysctls;
+#if defined(CONFIG_MEMFD_CREATE)
int memfd_noexec_scope;
#endif
+#endif
} __randomize_layout;
extern struct pid_namespace init_pid_ns;
@@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
void pidhash_init(void);
void pid_idr_init(void);
+int register_pidns_sysctls(struct pid_namespace *pidns);
+void unregister_pidns_sysctls(struct pid_namespace *pidns);
static inline bool task_is_in_init_pid_ns(struct task_struct *tsk)
{
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 75bdf9807802..7c830d0dec9a 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -4,5 +4,8 @@
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
+void pidfs_add_pid(struct pid *pid);
+void pidfs_remove_pid(struct pid *pid);
+extern const struct dentry_operations pidfs_dentry_operations;
#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index b34ed0cc1f8d..3ec24f445c29 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -42,6 +42,11 @@
#define EC_MAX_RESPONSE_OVERHEAD 32
/*
+ * ACPI notify value for MKBP host event.
+ */
+#define ACPI_NOTIFY_CROS_EC_MKBP 0x80
+
+/*
* EC panic is not covered by the standard (0-F) ACPI notify values.
* Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
* device specific ACPI notify range.
@@ -246,6 +251,8 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
struct cros_ec_command *msg);
+int cros_ec_rwsig_continue(struct cros_ec_device *ec_dev);
+
int cros_ec_query_all(struct cros_ec_device *ec_dev);
int cros_ec_get_next_event(struct cros_ec_device *ec_dev,
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 568183e3e641..414146abfe81 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -102,6 +102,8 @@ struct dev_pm_opp_data {
struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
+unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index);
+
unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies);
@@ -205,6 +207,11 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *
static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
+static inline unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index)
+{
+ return 0;
+}
+
static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
{
return 0;
diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h
index c9a31c567e85..2e18fef1a2e1 100644
--- a/include/linux/pruss_driver.h
+++ b/include/linux/pruss_driver.h
@@ -144,32 +144,32 @@ static inline int pruss_release_mem_region(struct pruss *pruss,
static inline int pruss_cfg_get_gpmux(struct pruss *pruss,
enum pruss_pru_id pru_id, u8 *mux)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline int pruss_cfg_set_gpmux(struct pruss *pruss,
enum pruss_pru_id pru_id, u8 mux)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline int pruss_cfg_gpimode(struct pruss *pruss,
enum pruss_pru_id pru_id,
enum pruss_gpi_mode mode)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline int pruss_cfg_xfr_enable(struct pruss *pruss,
enum pru_type pru_type,
- bool enable);
+ bool enable)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
#endif /* CONFIG_TI_PRUSS */
diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h
index 730f77381d55..2503f7625d65 100644
--- a/include/linux/pseudo_fs.h
+++ b/include/linux/pseudo_fs.h
@@ -5,6 +5,7 @@
struct pseudo_fs_context {
const struct super_operations *ops;
+ const struct export_operations *eops;
const struct xattr_handler * const *xattr;
const struct dentry_operations *dops;
unsigned long magic;
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 7c173aa64e1e..8d2ba3749866 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -211,6 +211,43 @@ rb_add(struct rb_node *node, struct rb_root *tree,
}
/**
+ * rb_find_add_cached() - find equivalent @node in @tree, or add @node
+ * @node: node to look-for / insert
+ * @tree: tree to search / modify
+ * @cmp: operator defining the node order
+ *
+ * Returns the rb_node matching @node, or NULL when no match is found and @node
+ * is inserted.
+ */
+static __always_inline struct rb_node *
+rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree,
+ int (*cmp)(const struct rb_node *new, const struct rb_node *exist))
+{
+ bool leftmost = true;
+ struct rb_node **link = &tree->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ int c;
+
+ while (*link) {
+ parent = *link;
+ c = cmp(node, parent);
+
+ if (c < 0) {
+ link = &parent->rb_left;
+ } else if (c > 0) {
+ link = &parent->rb_right;
+ leftmost = false;
+ } else {
+ return parent;
+ }
+ }
+
+ rb_link_node(node, parent, link);
+ rb_insert_color_cached(node, tree, leftmost);
+ return NULL;
+}
+
+/**
* rb_find_add() - find equivalent @node in @tree, or add @node
* @node: node to look-for / insert
* @tree: tree to search / modify
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 14dfa6008467..1b11926ddd47 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
* way, we must not access it directly
*/
#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
+/*
+ * Return the ->prev pointer of a list_head in an rcu safe way. Don't
+ * access it directly.
+ *
+ * Any list traversed with list_bidir_prev_rcu() must never use
+ * list_del_rcu(). Doing so will poison the ->prev pointer that
+ * list_bidir_prev_rcu() relies on, which will result in segfaults.
+ * To prevent these segfaults, use list_bidir_del_rcu() instead
+ * of list_del_rcu().
+ */
+#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev)))
/**
* list_tail_rcu - returns the prev pointer of the head of the list
@@ -159,6 +170,39 @@ static inline void list_del_rcu(struct list_head *entry)
}
/**
+ * list_bidir_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * In contrast to list_del_rcu() doesn't poison the prev pointer thus
+ * allowing backwards traversal via list_bidir_prev_rcu().
+ *
+ * Note: list_empty() on entry does not return true after this because
+ * the entry is in a special undefined state that permits RCU-based
+ * lockfree reverse traversal. In particular this means that we can not
+ * poison the forward and backwards pointers that may still be used for
+ * walking the list.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another list-mutation
+ * primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on
+ * this same list. However, it is perfectly legal to run concurrently
+ * with the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ *
+ * Note that list_del_rcu() and list_bidir_del_rcu() must not be used on
+ * the same list.
+ *
+ * Note that the caller is not permitted to immediately free
+ * the newly deleted entry. Instead, either synchronize_rcu()
+ * or call_rcu() must be used to defer freeing until an RCU
+ * grace period has elapsed.
+ */
+static inline void list_bidir_del_rcu(struct list_head *entry)
+{
+ __list_del_entry(entry);
+}
+
+/**
* hlist_del_init_rcu - deletes entry from hash list with re-initialization
* @n: the element to delete from the hash list.
*
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 303ab9bee155..f9bed3d3f78d 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -65,4 +65,15 @@ static inline void cond_resched_rcu(void)
#endif
}
+// Has the current task blocked within its current RCU read-side
+// critical section?
+static inline bool has_rcu_reader_blocked(void)
+{
+#ifdef CONFIG_PREEMPT_RCU
+ return !list_empty(&current->rcu_node_entry);
+#else
+ return false;
+#endif
+}
+
#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rolling_buffer.h b/include/linux/rolling_buffer.h
new file mode 100644
index 000000000000..ac15b1ffdd83
--- /dev/null
+++ b/include/linux/rolling_buffer.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Rolling buffer of folios
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _ROLLING_BUFFER_H
+#define _ROLLING_BUFFER_H
+
+#include <linux/folio_queue.h>
+#include <linux/uio.h>
+
+/*
+ * Rolling buffer. Whilst the buffer is live and in use, folios and folio
+ * queue segments can be added to one end by one thread and removed from the
+ * other end by another thread. The buffer isn't allowed to be empty; it must
+ * always have at least one folio_queue in it so that neither side has to
+ * modify both queue pointers.
+ *
+ * The iterator in the buffer is extended as buffers are inserted. It can be
+ * snapshotted to use a segment of the buffer.
+ */
+struct rolling_buffer {
+ struct folio_queue *head; /* Producer's insertion point */
+ struct folio_queue *tail; /* Consumer's removal point */
+ struct iov_iter iter; /* Iterator tracking what's left in the buffer */
+ u8 next_head_slot; /* Next slot in ->head */
+ u8 first_tail_slot; /* First slot in ->tail */
+};
+
+/*
+ * Snapshot of a rolling buffer.
+ */
+struct rolling_buffer_snapshot {
+ struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
+ unsigned char curr_slot; /* Folio currently being read */
+ unsigned char curr_order; /* Order of folio */
+};
+
+/* Marks to store per-folio in the internal folio_queue structs. */
+#define ROLLBUF_MARK_1 BIT(0)
+#define ROLLBUF_MARK_2 BIT(1)
+
+int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
+ unsigned int direction);
+int rolling_buffer_make_space(struct rolling_buffer *roll);
+ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
+ struct readahead_control *ractl,
+ struct folio_batch *put_batch);
+ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
+ unsigned int flags);
+struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll);
+void rolling_buffer_clear(struct rolling_buffer *roll);
+
+static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount)
+{
+ iov_iter_advance(&roll->iter, amount);
+}
+
+#endif /* _ROLLING_BUFFER_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 64934e0830af..ac08431e238f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -944,6 +944,7 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
+ unsigned sched_task_hot:1;
/* Force alignment to the next boundary: */
unsigned :0;
@@ -1374,6 +1375,15 @@ struct task_struct {
* with respect to preemption.
*/
unsigned long rseq_event_mask;
+# ifdef CONFIG_DEBUG_RSEQ
+ /*
+ * This is a place holder to save a copy of the rseq fields for
+ * validation of read-only fields. The struct rseq has a
+ * variable-length array at the end, so it cannot be used
+ * directly. Reserve a size large enough for the known fields.
+ */
+ char rseq_fields[sizeof(struct rseq)];
+# endif
#endif
#ifdef CONFIG_SCHED_MM_CID
@@ -1944,11 +1954,10 @@ static inline void kick_process(struct task_struct *tsk) { }
#endif
extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
-
-static inline void set_task_comm(struct task_struct *tsk, const char *from)
-{
- __set_task_comm(tsk, from, false);
-}
+#define set_task_comm(tsk, from) ({ \
+ BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \
+ __set_task_comm(tsk, from, false); \
+})
/*
* - Why not use task_lock()?
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 2b461129d1fa..d8501f4709b5 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -7,16 +7,21 @@
#include <linux/tick.h>
enum hk_type {
- HK_TYPE_TIMER,
- HK_TYPE_RCU,
- HK_TYPE_MISC,
- HK_TYPE_SCHED,
- HK_TYPE_TICK,
HK_TYPE_DOMAIN,
- HK_TYPE_WQ,
HK_TYPE_MANAGED_IRQ,
- HK_TYPE_KTHREAD,
- HK_TYPE_MAX
+ HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_MAX,
+
+ /*
+ * The following housekeeping types are only set by the nohz_full
+ * boot commandline option. So they can share the same value.
+ */
+ HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE,
+ HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
};
#ifdef CONFIG_CPU_ISOLATION
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa5ac7a..7f3dbafe1817 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
@@ -140,9 +143,7 @@ struct sched_domain {
unsigned int ttwu_move_affine;
unsigned int ttwu_move_balance;
#endif
-#ifdef CONFIG_SCHED_DEBUG
char *name;
-#endif
union {
void *private; /* used during construction */
struct rcu_head rcu; /* used during destruction */
@@ -198,18 +199,12 @@ struct sched_domain_topology_level {
int flags;
int numa_level;
struct sd_data data;
-#ifdef CONFIG_SCHED_DEBUG
char *name;
-#endif
};
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
-#ifdef CONFIG_SCHED_DEBUG
# define SD_INIT_NAME(type) .name = #type
-#else
-# define SD_INIT_NAME(type)
-#endif
#else /* CONFIG_SMP */
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 06cd8fb2f409..0f28b4623ad4 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -63,4 +63,38 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);
+/* Spin unlock helpers to unlock and call wake_up_q with preempt disabled */
+static inline
+void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q)
+{
+ guard(preempt)();
+ raw_spin_unlock(lock);
+ if (wake_q) {
+ wake_up_q(wake_q);
+ wake_q_init(wake_q);
+ }
+}
+
+static inline
+void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q)
+{
+ guard(preempt)();
+ raw_spin_unlock_irq(lock);
+ if (wake_q) {
+ wake_up_q(wake_q);
+ wake_q_init(wake_q);
+ }
+}
+
+static inline
+void raw_spin_unlock_irqrestore_wake(raw_spinlock_t *lock, unsigned long flags,
+ struct wake_q_head *wake_q)
+{
+ guard(preempt)();
+ raw_spin_unlock_irqrestore(lock, flags);
+ if (wake_q) {
+ wake_up_q(wake_q);
+ wake_q_init(wake_q);
+ }
+}
#endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index cbdba435b798..980b6c207cad 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -226,6 +226,18 @@ extern unsigned long dac_mmap_min_addr;
#endif
/*
+ * A "security context" is the text representation of
+ * the information used by LSMs.
+ * This structure contains the string, its length, and which LSM
+ * it is useful for.
+ */
+struct lsm_context {
+ char *context; /* Provided by the module */
+ u32 len;
+ int id; /* Identifies the module */
+};
+
+/*
* Values used in the task_security_ops calls
*/
/* setuid or setgid, id0 == uid or gid */
@@ -378,8 +390,8 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb,
int security_move_mount(const struct path *from_path, const struct path *to_path);
int security_dentry_init_security(struct dentry *dentry, int mode,
const struct qstr *name,
- const char **xattr_name, void **ctx,
- u32 *ctxlen);
+ const char **xattr_name,
+ struct lsm_context *lsmcxt);
int security_dentry_create_files_as(struct dentry *dentry, int mode,
struct qstr *name,
const struct cred *old,
@@ -553,14 +565,14 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_ismaclabel(const char *name);
-int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
-int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen);
+int security_secid_to_secctx(u32 secid, struct lsm_context *cp);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
-void security_release_secctx(char *secdata, u32 seclen);
+void security_release_secctx(struct lsm_context *cp);
void security_inode_invalidate_secctx(struct inode *inode);
int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
-int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
+int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp);
int security_locked_down(enum lockdown_reason what);
int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
void *val, size_t val_len, u64 id, u64 flags);
@@ -852,8 +864,7 @@ static inline int security_dentry_init_security(struct dentry *dentry,
int mode,
const struct qstr *name,
const char **xattr_name,
- void **ctx,
- u32 *ctxlen)
+ struct lsm_context *lsmcxt)
{
return -EOPNOTSUPP;
}
@@ -1526,14 +1537,13 @@ static inline int security_ismaclabel(const char *name)
return 0;
}
-static inline int security_secid_to_secctx(u32 secid, char **secdata,
- u32 *seclen)
+static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp)
{
return -EOPNOTSUPP;
}
static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
- char **secdata, u32 *seclen)
+ struct lsm_context *cp)
{
return -EOPNOTSUPP;
}
@@ -1545,7 +1555,7 @@ static inline int security_secctx_to_secid(const char *secdata,
return -EOPNOTSUPP;
}
-static inline void security_release_secctx(char *secdata, u32 seclen)
+static inline void security_release_secctx(struct lsm_context *cp)
{
}
@@ -1561,7 +1571,8 @@ static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32
{
return -EOPNOTSUPP;
}
-static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+static inline int security_inode_getsecctx(struct inode *inode,
+ struct lsm_context *cp)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5298765d6ca4..d1a2346cf0f8 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
({ \
unsigned __seq; \
\
- while ((__seq = seqprop_sequence(s)) & 1) \
+ while (unlikely((__seq = seqprop_sequence(s)) & 1)) \
cpu_relax(); \
\
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
@@ -319,6 +319,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
})
/**
+ * raw_seqcount_try_begin() - begin a seqcount_t read critical section
+ * w/o lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Similar to raw_seqcount_begin(), except it enables eliding the critical
+ * section entirely if odd, instead of doing the speculation knowing it will
+ * fail.
+ *
+ * Useful when counter stabilization is more or less equivalent to taking
+ * the lock and there is a slowpath that does that.
+ *
+ * If true, start will be set to the (even) sequence count read.
+ *
+ * Return: true when a read critical section is started.
+ */
+#define raw_seqcount_try_begin(s, start) \
+({ \
+ start = raw_read_seqcount(s); \
+ !(start & 1); \
+})
+
+/**
* raw_seqcount_begin() - begin a seqcount_t read critical section w/o
* lockdep and w/o counter stabilization
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 10a971c2bde3..09eedaecf120 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -1099,5 +1099,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s);
size_t kmalloc_size_roundup(size_t size);
void __init kmem_cache_init_late(void);
+void __init kvfree_rcu_init(void);
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 08339eb8a01c..d7ba46e74f58 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -43,6 +43,12 @@ int init_srcu_struct(struct srcu_struct *ssp);
#define __SRCU_DEP_MAP_INIT(srcu_name)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */
+#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
+#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
+#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
+#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above.
+
#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
#elif defined(CONFIG_TREE_SRCU)
@@ -232,13 +238,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited().
*
- * The return value from srcu_read_lock() must be passed unaltered
- * to the matching srcu_read_unlock(). Note that srcu_read_lock() and
- * the matching srcu_read_unlock() must occur in the same context, for
- * example, it is illegal to invoke srcu_read_unlock() in an irq handler
- * if the matching srcu_read_lock() was invoked in process context. Or,
- * for that matter to invoke srcu_read_unlock() from one task and the
- * matching srcu_read_lock() from another.
+ * The return value from srcu_read_lock() is guaranteed to be
+ * non-negative. This value must be passed unaltered to the matching
+ * srcu_read_unlock(). Note that srcu_read_lock() and the matching
+ * srcu_read_unlock() must occur in the same context, for example, it is
+ * illegal to invoke srcu_read_unlock() in an irq handler if the matching
+ * srcu_read_lock() was invoked in process context. Or, for that matter to
+ * invoke srcu_read_unlock() from one task and the matching srcu_read_lock()
+ * from another.
*/
static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 490aeecc6bb4..b17814c9d1c7 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -26,6 +26,7 @@ struct srcu_data {
atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */
+ /* Values: SRCU_READ_FLAVOR_.* */
/* Update-side state. */
spinlock_t __private lock ____cacheline_internodealigned_in_smp;
@@ -43,11 +44,6 @@ struct srcu_data {
struct srcu_struct *ssp;
};
-/* Values for ->srcu_reader_flavor. */
-#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
-#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
-#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
-
/*
* Node in SRCU combining tree, similar in function to rcu_data.
*/
@@ -258,7 +254,7 @@ static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
return;
- // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
+ // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered.
__srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
}
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 3d900c86981c..9d8382e23a9c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,7 @@ struct kstat {
u64 mnt_id;
u32 dio_mem_align;
u32 dio_offset_align;
+ u32 dio_read_offset_align;
u64 change_cookie;
u64 subvol;
u32 atomic_write_unit_min;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 0e035f675efe..542773650200 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -264,18 +264,6 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
/**
- * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
- * @mono: Monotonic timestamp
- * @boot: Boottime timestamp
- * @real: Realtime timestamp
- */
-struct ktime_timestamps {
- u64 mono;
- u64 boot;
- u64 real;
-};
-
-/**
* struct system_time_snapshot - simultaneous raw/real time capture with
* counter value
* @cycles: Clocksource counter value to produce the system times
@@ -345,9 +333,6 @@ extern int get_device_system_crosststamp(
*/
extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
-/* NMI safe mono/boot/realtime timestamps */
-extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap);
-
/*
* Persistent clock related interfaces
*/
diff --git a/include/linux/torture.h b/include/linux/torture.h
index c2e979f82f8d..0134e7221cae 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -130,7 +130,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
#endif
#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST)
-long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
+long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn);
#endif
#endif /* __LINUX_TORTURE_H */
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index dd4b31ce6d5d..d4cdc089f6c3 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -320,7 +320,7 @@ struct serial_struct;
*
* @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)``
*
- * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is
+ * kgdboc support (Documentation/process/debugging/kgdb.rst). This routine is
* called to initialize the HW for later use by calling @poll_get_char or
* @poll_put_char.
*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 853f9de5aa05..8ada84e85447 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -82,6 +82,15 @@ struct iov_iter {
};
};
+typedef __u16 uio_meta_flags_t;
+
+struct uio_meta {
+ uio_meta_flags_t flags;
+ u16 app_tag;
+ u64 seed;
+ struct iov_iter iter;
+};
+
static inline const struct iovec *iter_iov(const struct iov_iter *iter)
{
if (iter->iter_type == ITER_UBUF)
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index e0a4c2082245..b1df7d792fa1 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/timer.h>
+#include <linux/seqlock.h>
struct uprobe;
struct vm_area_struct;
@@ -124,6 +125,10 @@ struct uprobe_task {
unsigned int depth;
struct return_instance *return_instances;
+ struct return_instance *ri_pool;
+ struct timer_list ri_timer;
+ seqcount_t ri_seqcount;
+
union {
struct {
struct arch_uprobe_task autask;
@@ -137,7 +142,6 @@ struct uprobe_task {
};
struct uprobe *active_uprobe;
- struct timer_list ri_timer;
unsigned long xol_vaddr;
struct arch_uprobe *auprobe;
@@ -154,12 +158,18 @@ struct return_instance {
unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */
- int consumers_cnt;
+ int cons_cnt; /* total number of session consumers */
struct return_instance *next; /* keep as stack */
struct rcu_head rcu;
- struct return_consumer consumers[] __counted_by(consumers_cnt);
+ /* singular pre-allocated return_consumer instance for common case */
+ struct return_consumer consumer;
+ /*
+ * extra return_consumer instances for rare cases of multiple session consumers,
+ * contains (cons_cnt - 1) elements
+ */
+ struct return_consumer *extra_consumers;
} ____cacheline_aligned;
enum rp_check {