diff options
Diffstat (limited to 'include/linux')
99 files changed, 2575 insertions, 920 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 44975c1bbe12..7e7a33b6c8d7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -105,6 +105,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, + ACPI_IRQ_MODEL_LPIC, ACPI_IRQ_MODEL_COUNT }; @@ -356,7 +357,8 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *fwnode); + struct fwnode_handle *(*)(u32)); +void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h index 1eb8ee5b0e5f..a5a122431563 100644 --- a/include/linux/acpi_viot.h +++ b/include/linux/acpi_viot.h @@ -6,9 +6,11 @@ #include <linux/acpi.h> #ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_early_init(void); void __init acpi_viot_init(void); int viot_iommu_configure(struct device *dev); #else +static inline void acpi_viot_early_init(void) {} static inline void acpi_viot_init(void) {} static inline int viot_iommu_configure(struct device *dev) { diff --git a/include/linux/audit.h b/include/linux/audit.h index cece70231138..00f7a80f1a3e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -119,8 +119,6 @@ enum audit_nfcfgop { AUDIT_NFT_OP_INVALID, }; -extern int is_audit_feature_set(int which); - extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index edb7f6d41faa..5ca2d5699620 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -57,7 +57,6 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); - struct inode *inode; }; extern struct page *balloon_page_alloc(void); @@ -75,11 +74,10 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; - balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern const struct address_space_operations balloon_aops; +extern const struct movable_operations balloon_mops; /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -94,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, balloon->inode->i_mapping); + __SetPageMovable(page, &balloon_mops); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 992ee987f273..ca22b06700a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,7 +405,7 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); @@ -418,7 +418,7 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio, extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } @@ -456,9 +456,9 @@ struct request_queue; extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf); + unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -789,6 +789,6 @@ static inline void bio_clear_polled(struct bio *bio) } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp); + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e2d9daf7e8dd..effee1dc715a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* queue has elevator attached */ #define RQF_ELV ((__force req_flags_t)(1 << 22)) +#define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -79,7 +80,7 @@ struct request { struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; - unsigned int cmd_flags; /* op and common flags */ + blk_opf_t cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; @@ -197,8 +198,10 @@ struct request { void *end_io_data; }; -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) +static inline enum req_op req_op(const struct request *req) +{ + return req->cmd_flags & REQ_OP_MASK; +} static inline bool blk_rq_is_passthrough(struct request *rq) { @@ -519,7 +522,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *); /** * struct blk_mq_ops - Callback functions that implements block driver @@ -574,7 +577,7 @@ struct blk_mq_ops { /** * @timeout: Called on request timeout. */ - enum blk_eh_timer_return (*timeout)(struct request *, bool); + enum blk_eh_timer_return (*timeout)(struct request *); /** * @poll: Called to poll for completion of a specific tag. @@ -686,10 +689,12 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, \ __blk_mq_alloc_disk(set, queuedata, &__key); \ }) +struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, + struct lock_class_key *lkclass); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -void blk_mq_unregister_dev(struct device *, struct request_queue *); +void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, @@ -710,10 +715,10 @@ enum { BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; -struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags, + blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx); /* @@ -823,6 +828,11 @@ static inline bool blk_mq_need_time_stamp(struct request *rq) return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); } +static inline bool blk_mq_is_reserved_rq(struct request *rq) +{ + return rq->rq_flags & RQF_RESV; +} + /* * Batched completions only work when there is no I/O error and no special * ->end_io handler. @@ -1121,12 +1131,12 @@ void blk_dump_rq_flags(struct request *, char *); #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); + return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); } static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); + return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); } bool blk_req_needs_zone_write_lock(struct request *rq); @@ -1148,8 +1158,8 @@ static inline void blk_req_zone_write_unlock(struct request *rq) static inline bool blk_req_zone_is_write_locked(struct request *rq) { - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); + return rq->q->disk->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a24d4078fb21..1ef99790f6ed 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -240,6 +240,8 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef __u32 __bitwise blk_opf_t; + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U @@ -250,7 +252,7 @@ typedef unsigned int blk_qc_t; struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_opf; /* bottom bits REQ_OP, top bits + blk_opf_t bi_opf; /* bottom bits REQ_OP, top bits * req_flags. */ unsigned short bi_flags; /* BIO_* below */ @@ -337,8 +339,12 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; -/* - * Operations and flags common to the bio and request structures. +#define REQ_OP_BITS 8 +#define REQ_OP_MASK (__force blk_opf_t)((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +/** + * enum req_op - Operations common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data @@ -350,41 +356,37 @@ typedef __u32 __bitwise blk_mq_req_flags_t; * If a operation does not transfer data the least significant bit has no * meaning. */ -#define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) -#define REQ_FLAG_BITS 24 - -enum req_opf { +enum req_op { /* read sectors from the device */ - REQ_OP_READ = 0, + REQ_OP_READ = (__force blk_opf_t)0, /* write sectors to the device */ - REQ_OP_WRITE = 1, + REQ_OP_WRITE = (__force blk_opf_t)1, /* flush the volatile write cache */ - REQ_OP_FLUSH = 2, + REQ_OP_FLUSH = (__force blk_opf_t)2, /* discard sectors */ - REQ_OP_DISCARD = 3, + REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ - REQ_OP_SECURE_ERASE = 5, + REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 9, + REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = 10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, /* Close a zone */ - REQ_OP_ZONE_CLOSE = 11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = 12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = 13, + REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ - REQ_OP_DRV_IN = 34, - REQ_OP_DRV_OUT = 35, + REQ_OP_DRV_IN = (__force blk_opf_t)34, + REQ_OP_DRV_OUT = (__force blk_opf_t)35, - REQ_OP_LAST, + REQ_OP_LAST = (__force blk_opf_t)36, }; enum req_flag_bits { @@ -425,28 +427,31 @@ enum req_flag_bits { __REQ_NR_BITS, /* stops here */ }; -#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) -#define REQ_SYNC (1ULL << __REQ_SYNC) -#define REQ_META (1ULL << __REQ_META) -#define REQ_PRIO (1ULL << __REQ_PRIO) -#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_IDLE (1ULL << __REQ_IDLE) -#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) -#define REQ_FUA (1ULL << __REQ_FUA) -#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) -#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) -#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) - -#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_POLLED (1ULL << __REQ_POLLED) -#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) - -#define REQ_DRV (1ULL << __REQ_DRV) -#define REQ_SWAP (1ULL << __REQ_SWAP) +#define REQ_FAILFAST_DEV \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DRIVER) +#define REQ_SYNC (__force blk_opf_t)(1ULL << __REQ_SYNC) +#define REQ_META (__force blk_opf_t)(1ULL << __REQ_META) +#define REQ_PRIO (__force blk_opf_t)(1ULL << __REQ_PRIO) +#define REQ_NOMERGE (__force blk_opf_t)(1ULL << __REQ_NOMERGE) +#define REQ_IDLE (__force blk_opf_t)(1ULL << __REQ_IDLE) +#define REQ_INTEGRITY (__force blk_opf_t)(1ULL << __REQ_INTEGRITY) +#define REQ_FUA (__force blk_opf_t)(1ULL << __REQ_FUA) +#define REQ_PREFLUSH (__force blk_opf_t)(1ULL << __REQ_PREFLUSH) +#define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) +#define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) +#define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) +#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT) + +#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) +#define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) +#define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) + +#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) +#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -463,26 +468,28 @@ enum stat_group { NR_STAT_GROUPS }; -#define bio_op(bio) \ - ((bio)->bi_opf & REQ_OP_MASK) +static inline enum req_op bio_op(const struct bio *bio) +{ + return bio->bi_opf & REQ_OP_MASK; +} /* obsolete, don't use in new code */ -static inline void bio_set_op_attrs(struct bio *bio, unsigned op, - unsigned op_flags) +static inline void bio_set_op_attrs(struct bio *bio, enum req_op op, + blk_opf_t op_flags) { bio->bi_opf = op | op_flags; } -static inline bool op_is_write(unsigned int op) +static inline bool op_is_write(blk_opf_t op) { - return (op & 1); + return !!(op & (__force blk_opf_t)1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ -static inline bool op_is_flush(unsigned int op) +static inline bool op_is_flush(blk_opf_t op) { return op & (REQ_FUA | REQ_PREFLUSH); } @@ -492,13 +499,13 @@ static inline bool op_is_flush(unsigned int op) * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ -static inline bool op_is_sync(unsigned int op) +static inline bool op_is_sync(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } -static inline bool op_is_discard(unsigned int op) +static inline bool op_is_discard(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } @@ -509,7 +516,7 @@ static inline bool op_is_discard(unsigned int op) * due to its different handling in the block layer and device response in * case of command failure. */ -static inline bool op_is_zone_mgmt(enum req_opf op) +static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: @@ -522,7 +529,7 @@ static inline bool op_is_zone_mgmt(enum req_opf op) } } -static inline int op_stat_group(unsigned int op) +static inline int op_stat_group(enum req_op op) { if (op_is_discard(op)) return STAT_DISCARD; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..dccdf1551c62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -148,6 +148,7 @@ struct gendisk { #define GD_NATIVE_CAPACITY 3 #define GD_ADDED 4 #define GD_SUPPRESS_PART_SCAN 5 +#define GD_OWNS_QUEUE 6 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -163,6 +164,29 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). + */ + unsigned int nr_zones; + unsigned int max_open_zones; + unsigned int max_active_zones; + unsigned long *conv_zones_bitmap; + unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ + #if IS_ENABLED(CONFIG_CDROM) struct cdrom_device_info *cdi; #endif @@ -170,6 +194,12 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map; u64 diskseq; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; }; static inline bool disk_live(struct gendisk *disk) @@ -220,7 +250,7 @@ static inline int blk_validate_block_size(unsigned long bsize) return 0; } -static inline bool blk_op_is_passthrough(unsigned int op) +static inline bool blk_op_is_passthrough(blk_opf_t op) { op &= REQ_OP_MASK; return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; @@ -284,15 +314,15 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int blkdev_nr_zones(struct gendisk *disk); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, +unsigned int bdev_nr_zones(struct block_device *bdev); +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, @@ -305,7 +335,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct gendisk *disk) +static inline unsigned int bdev_nr_zones(struct block_device *bdev) { return 0; } @@ -424,6 +454,11 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION @@ -455,31 +490,6 @@ struct request_queue { unsigned int required_elevator_features; -#ifdef CONFIG_BLK_DEV_ZONED - /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. All three fields are - * initialized by the low level device driver (e.g. scsi/sd.c). - * Stacking drivers (device mappers) may or may not initialize - * these fields. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). - */ - unsigned int nr_zones; - unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; - unsigned int max_open_zones; - unsigned int max_active_zones; -#endif /* CONFIG_BLK_DEV_ZONED */ - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; @@ -533,12 +543,6 @@ struct request_queue { bool mq_sysfs_init_done; - /* - * Independent sector access ranges. This is always NULL for - * devices that do not have multiple independent access ranges. - */ - struct blk_independent_access_ranges *ia_ranges; - /** * @srcu: Sleepable RCU. Use as lock when type of the request queue * is blocking (BLK_MQ_F_BLOCKING). Must be the last member @@ -559,7 +563,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -587,7 +590,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ @@ -663,76 +665,69 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } -static inline sector_t blk_queue_zone_sectors(struct request_queue *q) -{ - return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; -} - #ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(q) ? q->nr_zones : 0; + return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return 0; - return sector >> ilog2(q->limits.chunk_sectors); + return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return false; - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return true; - return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); + return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); } -static inline void blk_queue_max_open_zones(struct request_queue *q, +static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - q->max_open_zones = max_open_zones; + disk->max_open_zones = max_open_zones; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) +static inline void disk_set_max_active_zones(struct gendisk *disk, + unsigned int max_active_zones) { - return q->max_open_zones; + disk->max_active_zones = max_active_zones; } -static inline void blk_queue_max_active_zones(struct request_queue *q, - unsigned int max_active_zones) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - q->max_active_zones = max_active_zones; + return bdev->bd_disk->max_open_zones; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return q->max_active_zones; + return bdev->bd_disk->max_active_zones; } + #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { return false; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) + +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } @@ -812,8 +807,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); @@ -832,7 +825,6 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); \ __blk_alloc_disk(node_id, &__key); \ }) -void blk_cleanup_disk(struct gendisk *disk); int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)); @@ -880,7 +872,7 @@ extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); /* Helper to convert REQ_OP_XXX to its string format XXX */ -extern const char *blk_op_str(unsigned int op); +extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -898,64 +890,33 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_queue; /* this is never NULL */ } -#ifdef CONFIG_BLK_DEV_ZONED - /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int bio_zone_no(struct bio *bio) { - return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } static inline unsigned int bio_zone_is_seq(struct bio *bio) { - return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); -} -#endif /* CONFIG_BLK_DEV_ZONED */ - -static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) -{ - if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, - UINT_MAX >> SECTOR_SHIFT); - - if (unlikely(op == REQ_OP_WRITE_ZEROES)) - return q->limits.max_write_zeroes_sectors; - - return q->limits.max_sectors; + return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } /* - * Return maximum size of a request at given offset. Only valid for - * file system requests. + * Return how much of the chunk is left to be used for I/O at a given offset. */ -static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset, - unsigned int chunk_sectors) -{ - if (!chunk_sectors) { - if (q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; - } - - if (likely(is_power_of_2(chunk_sectors))) - chunk_sectors -= offset & (chunk_sectors - 1); - else - chunk_sectors -= sector_div(offset, chunk_sectors); - - return min(q->limits.max_sectors, chunk_sectors); +static inline unsigned int blk_chunk_sectors_left(sector_t offset, + unsigned int chunk_sectors) +{ + if (unlikely(!is_power_of_2(chunk_sectors))) + return chunk_sectors - sector_div(offset, chunk_sectors); + return chunk_sectors - (offset & (chunk_sectors - 1)); } /* * Access functions for manipulating queue properties */ -extern void blk_cleanup_queue(struct request_queue *); void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); @@ -1206,6 +1167,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev) return queue_max_zone_append_sectors(bdev_get_queue(bdev)); } +static inline unsigned int bdev_max_segments(struct block_device *bdev) +{ + return queue_max_segments(bdev_get_queue(bdev)); +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; @@ -1337,32 +1303,26 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q) - return blk_queue_zone_sectors(q); - return 0; + if (!blk_queue_is_zoned(q)) + return 0; + return q->limits.chunk_sectors; } -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +static inline int queue_dma_alignment(const struct request_queue *q) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_open_zones(q); - return 0; + return q ? q->dma_alignment : 511; } -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) +static inline unsigned int bdev_dma_alignment(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_active_zones(q); - return 0; + return queue_dma_alignment(bdev_get_queue(bdev)); } -static inline int queue_dma_alignment(const struct request_queue *q) +static inline bool bdev_iter_is_aligned(struct block_device *bdev, + struct iov_iter *iter) { - return q ? q->dma_alignment : 511; + return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), + bdev_logical_block_size(bdev) - 1); } static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, @@ -1426,7 +1386,7 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); + int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, @@ -1479,9 +1439,9 @@ static inline void blk_wake_io_task(struct task_struct *waiter) } unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, unsigned int op, + unsigned int sectors, enum req_op op, unsigned long start_time); -void bdev_end_io_acct(struct block_device *bdev, unsigned int op, +void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); @@ -1502,7 +1462,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); -const char *bdevname(struct block_device *bdev, char *buffer); int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 623e22492afa..cfbda114348c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -7,6 +7,7 @@ #include <linux/compat.h> #include <uapi/linux/blktrace_api.h> #include <linux/list.h> +#include <linux/blk_types.h> #if defined(CONFIG_BLK_DEV_IO_TRACE) @@ -77,10 +78,6 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); -extern void blk_trace_remove_sysfs(struct device *dev); -extern int blk_trace_init_sysfs(struct device *dev); - -extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) @@ -91,13 +88,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(dev) do { } while (0) # define blk_trace_note_message_enabled(q) (false) -static inline int blk_trace_init_sysfs(struct device *dev) -{ - return 0; -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT @@ -115,7 +106,7 @@ struct compat_blk_user_trace_setup { #endif -void blk_fill_rwbs(char *rwbs, unsigned int op); +void blk_fill_rwbs(char *rwbs, blk_opf_t opf); static inline sector_t blk_rq_trace_sector(struct request *rq) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..307445d1c69e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -9,6 +9,7 @@ #define _LINUX_BUFFER_HEAD_H #include <linux/types.h> +#include <linux/blk_types.h> #include <linux/fs.h> #include <linux/linkage.h> #include <linux/pagemap.h> @@ -201,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, int, struct buffer_head * bh[]); +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); -void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int submit_bh(int, int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +int submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); @@ -258,14 +259,16 @@ static inline vm_fault_t block_page_mkwrite_return(int err) } sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_write_begin(struct address_space *, loff_t, unsigned len, - struct page **, void **, get_block_t*); -int nobh_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); -int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); + +#ifdef CONFIG_MIGRATION +extern int buffer_migrate_folio(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +extern int buffer_migrate_folio_norefs(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +#else +#define buffer_migrate_folio NULL +#define buffer_migrate_folio_norefs NULL +#endif void buffer_init(void); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index d4427d0a0e18..4bcf56b3491c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -89,19 +89,32 @@ enum { CGRP_ROOT_NS_DELEGATE = (1 << 3), /* + * Reduce latencies on dynamic cgroup modifications such as task + * migrations and controller on/offs by disabling percpu operation on + * cgroup_threadgroup_rwsem. This makes hot path operations such as + * forks and exits into the slow path and more expensive. + * + * The static usage pattern of creating a cgroup, enabling controllers, + * and then seeding it with CLONE_INTO_CGROUP doesn't require write + * locking cgroup_threadgroup_rwsem and thus doesn't benefit from + * favordynmod. + */ + CGRP_ROOT_FAVOR_DYNMODS = (1 << 4), + + /* * Enable cpuset controller in v1 cgroup to use v2 behavior. */ - CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), + CGRP_ROOT_CPUSET_V2_MODE = (1 << 16), /* * Enable legacy local memory.events. */ - CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), + CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17), /* * Enable recursive subtree protection */ - CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), + CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), }; /* cftype->flags */ @@ -288,6 +301,10 @@ struct css_set { struct cgroup_base_stat { struct task_cputime cputime; + +#ifdef CONFIG_SCHED_CORE + u64 forceidle_sum; +#endif }; /* @@ -476,7 +493,7 @@ struct cgroup { struct work_struct release_agent_work; /* used to track pressure stalls */ - struct psi_group psi; + struct psi_group *psi; /* used to store eBPF programs */ struct cgroup_bpf bpf; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0d1ada8968d7..ed53bfe7c46c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -674,7 +674,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { - return &cgrp->psi; + return cgrp->psi; } bool cgroup_psi_enabled(void); diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 7a14807c9d1a..dcef4a9e4d63 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,71 +10,72 @@ #include <asm/ptrace.h> -#ifdef CONFIG_CONTEXT_TRACKING -extern void context_tracking_cpu_set(int cpu); +#ifdef CONFIG_CONTEXT_TRACKING_USER +extern void ct_cpu_track_user(int cpu); /* Called with interrupts disabled. */ -extern void __context_tracking_enter(enum ctx_state state); -extern void __context_tracking_exit(enum ctx_state state); +extern void __ct_user_enter(enum ctx_state state); +extern void __ct_user_exit(enum ctx_state state); -extern void context_tracking_enter(enum ctx_state state); -extern void context_tracking_exit(enum ctx_state state); -extern void context_tracking_user_enter(void); -extern void context_tracking_user_exit(void); +extern void ct_user_enter(enum ctx_state state); +extern void ct_user_exit(enum ctx_state state); + +extern void user_enter_callable(void); +extern void user_exit_callable(void); static inline void user_enter(void) { if (context_tracking_enabled()) - context_tracking_enter(CONTEXT_USER); + ct_user_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_enabled()) - context_tracking_exit(CONTEXT_USER); + ct_user_exit(CONTEXT_USER); } /* Called with interrupts disabled. */ static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) - __context_tracking_enter(CONTEXT_USER); + __ct_user_enter(CONTEXT_USER); } static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) - __context_tracking_exit(CONTEXT_USER); + __ct_user_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) || !context_tracking_enabled()) return 0; - prev_ctx = this_cpu_read(context_tracking.state); + prev_ctx = __ct_state(); if (prev_ctx != CONTEXT_KERNEL) - context_tracking_exit(prev_ctx); + ct_user_exit(prev_ctx); return prev_ctx; } static inline void exception_exit(enum ctx_state prev_ctx) { - if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) && context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) - context_tracking_enter(prev_ctx); + ct_user_enter(prev_ctx); } } static __always_inline bool context_tracking_guest_enter(void) { if (context_tracking_enabled()) - __context_tracking_enter(CONTEXT_GUEST); + __ct_user_enter(CONTEXT_GUEST); return context_tracking_enabled_this_cpu(); } @@ -82,40 +83,56 @@ static __always_inline bool context_tracking_guest_enter(void) static __always_inline void context_tracking_guest_exit(void) { if (context_tracking_enabled()) - __context_tracking_exit(CONTEXT_GUEST); + __ct_user_exit(CONTEXT_GUEST); } -/** - * ct_state() - return the current context tracking state if known - * - * Returns the current cpu's context tracking state if context tracking - * is enabled. If context tracking is disabled, returns - * CONTEXT_DISABLED. This should be used primarily for debugging. - */ -static __always_inline enum ctx_state ct_state(void) -{ - return context_tracking_enabled() ? - this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; -} +#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) + #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void user_enter_irqoff(void) { } static inline void user_exit_irqoff(void) { } -static inline enum ctx_state exception_enter(void) { return 0; } +static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } -static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } +static inline int ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } +#define CT_WARN_ON(cond) do { } while (0) +#endif /* !CONFIG_CONTEXT_TRACKING_USER */ -#endif /* !CONFIG_CONTEXT_TRACKING */ - -#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) - -#ifdef CONFIG_CONTEXT_TRACKING_FORCE +#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE extern void context_tracking_init(void); #else static inline void context_tracking_init(void) { } -#endif /* CONFIG_CONTEXT_TRACKING_FORCE */ +#endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */ + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +extern void ct_idle_enter(void); +extern void ct_idle_exit(void); + +/* + * Is the current CPU in an extended quiescent state? + * + * No ordering, as we are sampling CPU-local information. + */ +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) +{ + return !(arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX); +} + +/* + * Increment the current CPU's context_tracking structure's ->state field + * with ordering. Return the new value. + */ +static __always_inline unsigned long ct_state_inc(int incby) +{ + return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state)); +} + +#else +static inline void ct_idle_enter(void) { } +static inline void ct_idle_exit(void) { } +#endif /* !CONFIG_CONTEXT_TRACKING_IDLE */ #endif diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h new file mode 100644 index 000000000000..c50b5670c4a5 --- /dev/null +++ b/include/linux/context_tracking_irq.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H +#define _LINUX_CONTEXT_TRACKING_IRQ_H + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +void ct_irq_enter(void); +void ct_irq_exit(void); +void ct_irq_enter_irqson(void); +void ct_irq_exit_irqson(void); +void ct_nmi_enter(void); +void ct_nmi_exit(void); +#else +static inline void ct_irq_enter(void) { } +static inline void ct_irq_exit(void) { } +static inline void ct_irq_enter_irqson(void) { } +static inline void ct_irq_exit_irqson(void) { } +static inline void ct_nmi_enter(void) { } +static inline void ct_nmi_exit(void) { } +#endif + +#endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ae1e63e26947..4a4d56f77180 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -4,8 +4,28 @@ #include <linux/percpu.h> #include <linux/static_key.h> +#include <linux/context_tracking_irq.h> + +/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) + +enum ctx_state { + CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ + CONTEXT_KERNEL = 0, + CONTEXT_IDLE = 1, + CONTEXT_USER = 2, + CONTEXT_GUEST = 3, + CONTEXT_MAX = 4, +}; + +/* Even value for idle, else odd. */ +#define RCU_DYNTICKS_IDX CONTEXT_MAX + +#define CT_STATE_MASK (CONTEXT_MAX - 1) +#define CT_DYNTICKS_MASK (~CT_STATE_MASK) struct context_tracking { +#ifdef CONFIG_CONTEXT_TRACKING_USER /* * When active is false, probes are unset in order * to minimize overhead: TIF flags are cleared @@ -14,18 +34,73 @@ struct context_tracking { */ bool active; int recursion; - enum ctx_state { - CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ - CONTEXT_KERNEL = 0, - CONTEXT_USER, - CONTEXT_GUEST, - } state; +#endif +#ifdef CONFIG_CONTEXT_TRACKING + atomic_t state; +#endif +#ifdef CONFIG_CONTEXT_TRACKING_IDLE + long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ +#endif }; #ifdef CONFIG_CONTEXT_TRACKING -extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); +static __always_inline int __ct_state(void) +{ + return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; +} +#endif + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +static __always_inline int ct_dynticks(void) +{ + return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK; +} + +static __always_inline int ct_dynticks_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return atomic_read(&ct->state) & CT_DYNTICKS_MASK; +} + +static __always_inline int ct_dynticks_cpu_acquire(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK; +} + +static __always_inline long ct_dynticks_nesting(void) +{ + return __this_cpu_read(context_tracking.dynticks_nesting); +} + +static __always_inline long ct_dynticks_nesting_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return ct->dynticks_nesting; +} + +static __always_inline long ct_dynticks_nmi_nesting(void) +{ + return __this_cpu_read(context_tracking.dynticks_nmi_nesting); +} + +static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return ct->dynticks_nmi_nesting; +} +#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ + +#ifdef CONFIG_CONTEXT_TRACKING_USER +extern struct static_key_false context_tracking_key; + static __always_inline bool context_tracking_enabled(void) { return static_branch_unlikely(&context_tracking_key); @@ -41,15 +116,31 @@ static inline bool context_tracking_enabled_this_cpu(void) return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } -static __always_inline bool context_tracking_in_user(void) +/** + * ct_state() - return the current context tracking state if known + * + * Returns the current cpu's context tracking state if context tracking + * is enabled. If context tracking is disabled, returns + * CONTEXT_DISABLED. This should be used primarily for debugging. + */ +static __always_inline int ct_state(void) { - return __this_cpu_read(context_tracking.state) == CONTEXT_USER; + int ret; + + if (!context_tracking_enabled()) + return CONTEXT_DISABLED; + + preempt_disable(); + ret = __ct_state(); + preempt_enable(); + + return ret; } + #else -static __always_inline bool context_tracking_in_user(void) { return false; } static __always_inline bool context_tracking_enabled(void) { return false; } static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; } static __always_inline bool context_tracking_enabled_this_cpu(void) { return false; } -#endif /* CONFIG_CONTEXT_TRACKING */ +#endif /* CONFIG_CONTEXT_TRACKING_USER */ #endif diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..f61447913db9 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -130,7 +130,6 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, - CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, @@ -151,6 +150,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_RISCV_STARTING, + CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, @@ -230,6 +230,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f5bba51480b2..c73e5e327e76 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -349,7 +349,7 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -extern void __d_lookup_done(struct dentry *); +extern void __d_lookup_unhash_wake(struct dentry *dentry); static inline int d_in_lookup(const struct dentry *dentry) { @@ -358,11 +358,8 @@ static inline int d_in_lookup(const struct dentry *dentry) static inline void d_lookup_done(struct dentry *dentry) { - if (unlikely(d_in_lookup(dentry))) { - spin_lock(&dentry->d_lock); - __d_lookup_done(dentry); - spin_unlock(&dentry->d_lock); - } + if (unlikely(d_in_lookup(dentry))) + __d_lookup_unhash_wake(dentry); } extern void dput(struct dentry *); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 47a01c7cffdf..04c6acf7faaa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -373,6 +373,12 @@ struct dm_target { * after returning DM_MAPIO_SUBMITTED from its map function. */ bool accounts_remapped_io:1; + + /* + * Set if the target will submit the DM bio without first calling + * bio_set_dev(). NOTE: ideally a target should _not_ need this. + */ + bool needs_bio_set_dev:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); @@ -561,7 +567,6 @@ void dm_sync_table(struct mapped_device *md); * Queries */ sector_t dm_table_get_size(struct dm_table *t); -unsigned int dm_table_get_num_targets(struct dm_table *t); fmode_t dm_table_get_mode(struct dm_table *t); struct mapped_device *dm_table_get_md(struct dm_table *t); const char *dm_table_device_name(struct dm_table *t); diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index a52c6580cc9a..8e1c4ab5df04 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -13,6 +13,7 @@ #ifdef __KERNEL__ #include <linux/types.h> +#include <linux/blk_types.h> struct dm_io_region { struct block_device *bdev; @@ -57,8 +58,7 @@ struct dm_io_notify { */ struct dm_io_client; struct dm_io_request { - int bi_op; /* REQ_OP */ - int bi_op_flags; /* req_flag_bits */ + blk_opf_t bi_opf; /* Request type and flags */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h new file mode 100644 index 000000000000..552b817ab102 --- /dev/null +++ b/include/linux/dm-verity-loadpin.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_DM_VERITY_LOADPIN_H +#define __LINUX_DM_VERITY_LOADPIN_H + +#include <linux/list.h> + +struct block_device; + +extern struct list_head dm_verity_loadpin_trusted_root_digests; + +struct dm_verity_loadpin_trusted_root_digest { + struct list_head node; + unsigned int len; + u8 data[]; +}; + +#if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) +bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev); +#else +static inline bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) +{ + return false; +} +#endif + +#endif /* __LINUX_DM_VERITY_LOADPIN_H */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d9b0bb47eb3..d2b84c2fec39 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -872,6 +872,7 @@ static inline bool efi_rt_services_supported(unsigned int mask) { return (efi.runtime_supported_mask & mask) == mask; } +extern void efi_find_mirror(void); #else static inline bool efi_enabled(int feature) { @@ -889,6 +890,8 @@ static inline bool efi_rt_services_supported(unsigned int mask) { return false; } + +static inline void efi_find_mirror(void) {} #endif extern int efi_status_to_err(efi_status_t status); @@ -1027,29 +1030,6 @@ struct efivars { #define EFI_VAR_NAME_LEN 1024 -struct efi_variable { - efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)]; - efi_guid_t VendorGuid; - unsigned long DataSize; - __u8 Data[1024]; - efi_status_t Status; - __u32 Attributes; -} __attribute__((packed)); - -struct efivar_entry { - struct efi_variable var; - struct list_head list; - struct kobject kobj; - bool scanning; - bool deleting; -}; - -static inline void -efivar_unregister(struct efivar_entry *var) -{ - kobject_put(&var->kobj); -} - int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject); @@ -1057,43 +1037,26 @@ int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); int efivar_supports_writes(void); -int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), - void *data, bool duplicates, struct list_head *head); - -int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -int efivar_entry_remove(struct efivar_entry *entry); - -int __efivar_entry_delete(struct efivar_entry *entry); -int efivar_entry_delete(struct efivar_entry *entry); - -int efivar_entry_size(struct efivar_entry *entry, unsigned long *size); -int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, - unsigned long *size, void *data); -int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, - unsigned long *size, void *data); -int efivar_entry_set(struct efivar_entry *entry, u32 attributes, - unsigned long size, void *data, struct list_head *head); -int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, - unsigned long *size, void *data, bool *set); -int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, - bool block, unsigned long size, void *data); - -int efivar_entry_iter_begin(void); -void efivar_entry_iter_end(void); - -int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *), - struct list_head *head, void *data, - struct efivar_entry **prev); -int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), - struct list_head *head, void *data); - -struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, - struct list_head *head, bool remove); - -bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, - unsigned long data_size); -bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, - size_t len); + +int efivar_lock(void); +int efivar_trylock(void); +void efivar_unlock(void); + +efi_status_t efivar_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *size, void *data); + +efi_status_t efivar_get_next_variable(unsigned long *name_size, + efi_char16_t *name, efi_guid_t *vendor); + +efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data, bool nonblocking); + +efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data); + +efi_status_t check_var_size(u32 attributes, unsigned long size); +efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size); #if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); @@ -1181,6 +1144,8 @@ static inline void efi_check_for_embedded_firmwares(void) { } efi_status_t efi_random_get_seed(void); +#define arch_efi_call_virt(p, f, args...) ((p)->f(args)) + /* * Arch code can implement the following three template macros, avoiding * reptition for the void/non-void return cases of {__,}efi_call_virt(): diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 8419bffb4398..b9caa01dfac4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -62,7 +62,7 @@ struct em_perf_domain { /* * em_perf_domain flags: * - * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some + * EM_PERF_DOMAIN_MICROWATTS: The power values are in micro-Watts or some * other scale. * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating @@ -71,7 +71,7 @@ struct em_perf_domain { * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be * created by platform missing real power information */ -#define EM_PERF_DOMAIN_MILLIWATTS BIT(0) +#define EM_PERF_DOMAIN_MICROWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) #define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) @@ -79,22 +79,44 @@ struct em_perf_domain { #define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL -#define EM_MAX_POWER 0xFFFF +/* + * The max power value in micro-Watts. The limit of 64 Watts is set as + * a safety net to not overflow multiplications on 32bit platforms. The + * 32bit value limit for total Perf Domain power implies a limit of + * maximum CPUs in such domain to 64. + */ +#define EM_MAX_POWER (64000000) /* 64 Watts */ + +/* + * To avoid possible energy estimation overflow on 32bit machines add + * limits to number of CPUs in the Perf. Domain. + * We are safe on 64bit machine, thus some big number. + */ +#ifdef CONFIG_64BIT +#define EM_MAX_NUM_CPUS 4096 +#else +#define EM_MAX_NUM_CPUS 16 +#endif /* - * Increase resolution of energy estimation calculations for 64-bit - * architectures. The extra resolution improves decision made by EAS for the - * task placement when two Performance Domains might provide similar energy - * estimation values (w/o better resolution the values could be equal). + * To avoid an overflow on 32bit machines while calculating the energy + * use a different order in the operation. First divide by the 'cpu_scale' + * which would reduce big value stored in the 'cost' field, then multiply by + * the 'sum_util'. This would allow to handle existing platforms, which have + * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. + * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' + * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. + * This reordering of operations has some limitations, we lose small + * precision in the estimation (comparing to 64bit platform w/o reordering). * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit - * are pretty high and the returns do not justify the increased costs. + * We are safe on 64bit machine. */ #ifdef CONFIG_64BIT -#define em_scale_power(p) ((p) * 1000) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) * (sum_util)) / (scale_cpu)) #else -#define em_scale_power(p) (p) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) / (scale_cpu)) * (sum_util)) #endif struct em_data_callback { @@ -112,7 +134,7 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-Watts or an abstract scale. It is expected to + * expressed in micro-Watts or an abstract scale. It is expected to * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. @@ -148,7 +170,7 @@ struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts); + bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -273,7 +295,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * pd_nrg = ------------------------ (4) * scale_cpu */ - return ps->cost * sum_util / scale_cpu; + return em_estimate_energy(ps->cost, sum_util, scale_cpu); } /** @@ -297,7 +319,7 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts) + bool microwatts) { return -EINVAL; } diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index c92ac75d6556..84a466b176cf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs); /** * struct irqentry_state - Opaque object for exception state storage * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the - * exit path has to invoke rcu_irq_exit(). + * exit path has to invoke ct_irq_exit(). * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that * lockdep state is restored correctly on exit from nmi. * @@ -395,12 +395,12 @@ typedef struct irqentry_state { * * For kernel mode entries RCU handling is done conditional. If RCU is * watching then the only RCU requirement is to check whether the tick has - * to be restarted. If RCU is not watching then rcu_irq_enter() has to be - * invoked on entry and rcu_irq_exit() on exit. + * to be restarted. If RCU is not watching then ct_irq_enter() has to be + * invoked on entry and ct_irq_exit() on exit. * - * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * Avoiding the ct_irq_enter/exit() calls is an optimization but also * solves the problem of kernel mode pagefaults which can schedule, which - * is not possible after invoking rcu_irq_enter() without undoing it. + * is not possible after invoking ct_irq_enter() without undoing it. * * For user mode entries irqentry_enter_from_user_mode() is invoked to * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit diff --git a/include/linux/evm.h b/include/linux/evm.h index 4c374be70247..aa63e0b3c0a2 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -21,7 +21,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry, void *xattr_value, size_t xattr_value_len, struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct dentry *dentry, struct iattr *attr); +extern int evm_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr); extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); extern int evm_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, @@ -68,7 +69,8 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, } #endif -static inline int evm_inode_setattr(struct dentry *dentry, struct iattr *attr) +static inline int evm_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr) { return 0; } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e517dbcf74ed..8ad743def6f3 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -59,15 +59,19 @@ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) +#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ + FAN_MARK_FLUSH) + +#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORE) + #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ - FAN_MARK_ADD | \ - FAN_MARK_REMOVE | \ + FANOTIFY_MARK_CMD_BITS | \ + FANOTIFY_MARK_IGNORE_BITS | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ - FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ - FAN_MARK_EVICTABLE | \ - FAN_MARK_FLUSH) + FAN_MARK_EVICTABLE) /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1ec73d5352c3..cbde3b1fa414 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -34,6 +34,7 @@ #define PM_API_VERSION_2 2 /* ATF only commands */ +#define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 #define PM_SET_SUSPEND_MODE 0xa02 #define GET_CALLBACK_DATA 0xa01 @@ -468,6 +469,7 @@ int zynqmp_pm_feature(const u32 api_id); int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id); int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value); int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload); +int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -733,6 +735,11 @@ static inline int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, { return -ENODEV; } + +static inline int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..cc64873d76c5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -180,6 +180,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) +/* File supports async nowait buffered writes */ +#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) + /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -221,8 +224,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, struct iattr { unsigned int ia_valid; umode_t ia_mode; - kuid_t ia_uid; - kgid_t ia_gid; + /* + * The two anonymous unions wrap structures with the same member. + * + * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which + * are a dedicated type requiring the filesystem to use the dedicated + * helpers. Other filesystem can continue to use ia_{g,u}id until they + * have been ported. + * + * They always contain the same value. In other words FS_ALLOW_IDMAP + * pass down the same value on idmapped mounts as they would on regular + * mounts. + */ + union { + kuid_t ia_uid; + vfsuid_t ia_vfsuid; + }; + union { + kgid_t ia_gid; + vfsgid_t ia_vfsgid; + }; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; @@ -362,13 +383,11 @@ struct address_space_operations { void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* - * migrate the contents of a page to the specified target. If + * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ - int (*migratepage) (struct address_space *, - struct page *, struct page *, enum migrate_mode); - bool (*isolate_page)(struct page *, isolate_mode_t); - void (*putback_page)(struct page *); + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); @@ -924,9 +943,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) struct file { union { - struct llist_node fu_llist; - struct rcu_head fu_rcuhead; - } f_u; + struct llist_node f_llist; + struct rcu_head f_rcuhead; + unsigned int f_iocb_flags; + }; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; @@ -1600,13 +1620,68 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map * + * Note, this will eventually be removed completely in favor of the type-safe + * i_uid_into_vfsuid(). + * * Return: the inode's i_uid mapped down according to @mnt_userns. * If the inode's i_uid has no mapping INVALID_UID is returned. */ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); + return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid)); +} + +/** + * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: whe inode's i_uid mapped down according to @mnt_userns. + * If the inode's i_uid has no mapping INVALID_VFSUID is returned. + */ +static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, + const struct inode *inode) +{ + return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid); +} + +/** + * i_uid_needs_update - check whether inode's i_uid needs to be updated + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Check whether the $inode's i_uid field needs to be updated taking idmapped + * mounts into account if the filesystem supports it. + * + * Return: true if @inode's i_uid field needs to be updated, false if not. + */ +static inline bool i_uid_needs_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + const struct inode *inode) +{ + return ((attr->ia_valid & ATTR_UID) && + !vfsuid_eq(attr->ia_vfsuid, + i_uid_into_vfsuid(mnt_userns, inode))); +} + +/** + * i_uid_update - update @inode's i_uid field + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Safely update @inode's i_uid field translating the vfsuid of any idmapped + * mount into the filesystem kuid. + */ +static inline void i_uid_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + struct inode *inode) +{ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode), + attr->ia_vfsuid); } /** @@ -1614,13 +1689,68 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map * + * Note, this will eventually be removed completely in favor of the type-safe + * i_gid_into_vfsgid(). + * * Return: the inode's i_gid mapped down according to @mnt_userns. * If the inode's i_gid has no mapping INVALID_GID is returned. */ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); + return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid)); +} + +/** + * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: the inode's i_gid mapped down according to @mnt_userns. + * If the inode's i_gid has no mapping INVALID_VFSGID is returned. + */ +static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, + const struct inode *inode) +{ + return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid); +} + +/** + * i_gid_needs_update - check whether inode's i_gid needs to be updated + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Check whether the $inode's i_gid field needs to be updated taking idmapped + * mounts into account if the filesystem supports it. + * + * Return: true if @inode's i_gid field needs to be updated, false if not. + */ +static inline bool i_gid_needs_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + const struct inode *inode) +{ + return ((attr->ia_valid & ATTR_GID) && + !vfsgid_eq(attr->ia_vfsgid, + i_gid_into_vfsgid(mnt_userns, inode))); +} + +/** + * i_gid_update - update @inode's i_gid field + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Safely update @inode's i_gid field translating the vfsgid of any idmapped + * mount into the filesystem kgid. + */ +static inline void i_gid_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + struct inode *inode) +{ + if (attr->ia_valid & ATTR_GID) + inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode), + attr->ia_vfsgid); } /** @@ -2195,17 +2325,15 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, struct inode *inode) { - return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) || - !gid_valid(i_gid_into_mnt(mnt_userns, inode)); + return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || + !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)); } -static inline int iocb_flags(struct file *file); - static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, - .ki_flags = iocb_flags(filp), + .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } @@ -2387,6 +2515,7 @@ static inline void file_accessed(struct file *file) } extern int file_modified(struct file *file); +int kiocb_modified(struct kiocb *iocb); int sync_inode_metadata(struct inode *inode, int wait); @@ -2720,6 +2849,12 @@ extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); +static inline bool iocb_is_dsync(const struct kiocb *iocb) +{ + return (iocb->ki_flags & IOCB_DSYNC) || + IS_SYNC(iocb->ki_filp->f_mapping->host); +} + /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount @@ -2727,7 +2862,7 @@ extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { - if (iocb->ki_flags & IOCB_DSYNC) { + if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); @@ -3022,7 +3157,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); -extern loff_t no_llseek(struct file *file, loff_t offset, int whence); +#define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, @@ -3215,18 +3350,6 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -extern int buffer_migrate_page_norefs(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -#else -#define buffer_migrate_page NULL -#define buffer_migrate_page_norefs NULL -#endif - int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); @@ -3262,7 +3385,7 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; - if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index b86265664879..720874e6ee94 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -379,7 +379,7 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, * * Request that the size of an object be changed. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9560734759fa..d7d96c806bff 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -518,8 +518,8 @@ struct fsnotify_mark { struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; - /* Events types to ignore [mark->lock, group->mark_mutex] */ - __u32 ignored_mask; + /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ + __u32 ignore_mask; /* General fsnotify mark flags */ #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 @@ -529,6 +529,7 @@ struct fsnotify_mark { /* fanotify mark flags */ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 +#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 unsigned int flags; /* flags [mark->lock] */ }; @@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ -/* Get mask for calculating object interest taking ignored mask into account */ +/* + * Canonical "ignore mask" including event flags. + * + * Note the subtle semantic difference from the legacy ->ignored_mask. + * ->ignored_mask traditionally only meant which events should be ignored, + * while ->ignore_mask also includes flags regarding the type of objects on + * which events should be ignored. + */ +static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) +{ + __u32 ignore_mask = mark->ignore_mask; + + /* The event flags in ignore mask take effect */ + if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + return ignore_mask; + + /* + * Legacy behavior: + * - Always ignore events on dir + * - Ignore events on child if parent is watching children + */ + ignore_mask |= FS_ISDIR; + ignore_mask &= ~FS_EVENT_ON_CHILD; + ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; + + return ignore_mask; +} + +/* Legacy ignored_mask - only event types to ignore */ +static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) +{ + return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* + * Check if mask (or ignore mask) should be applied depending if victim is a + * directory and whether it is reported to a watching parent. + */ +static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, + int iter_type) +{ + /* Should mask be applied to a directory? */ + if (is_dir && !(mask & FS_ISDIR)) + return false; + + /* Should mask be applied to a child? */ + if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && + !(mask & FS_EVENT_ON_CHILD)) + return false; + + return true; +} + +/* + * Effective ignore mask taking into account if event victim is a + * directory and whether it is reported to a watching parent. + */ +static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, + bool is_dir, int iter_type) +{ + __u32 ignore_mask = fsnotify_ignored_events(mark); + + if (!ignore_mask) + return 0; + + /* For non-dir and non-child, no need to consult the event flags */ + if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) + return ignore_mask; + + ignore_mask = fsnotify_ignore_mask(mark); + if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) + return 0; + + return ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* Get mask for calculating object interest taking ignore mask into account */ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) { __u32 mask = mark->mask; - if (!mark->ignored_mask) + if (!fsnotify_ignored_events(mark)) return mask; - /* Interest in FS_MODIFY may be needed for clearing ignored mask */ + /* Interest in FS_MODIFY may be needed for clearing ignore mask */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mask |= FS_MODIFY; @@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) * If mark is interested in ignoring events on children, the object must * show interest in those events for fsnotify_parent() to notice it. */ - return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS); + return mask | mark->ignore_mask; } /* Get mask of events for a list of marks */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 54c3c6506503..6aeea1071b1b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -12,6 +12,8 @@ #include <linux/property.h> #include <linux/types.h> +#include <asm/msi.h> + struct gpio_desc; struct of_phandle_args; struct device_node; @@ -23,6 +25,13 @@ enum gpio_lookup_flags; struct gpio_chip; +union gpio_irq_fwspec { + struct irq_fwspec fwspec; +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + msi_alloc_info_t msiinfo; +#endif +}; + #define GPIO_LINE_DIRECTION_IN 1 #define GPIO_LINE_DIRECTION_OUT 0 @@ -103,9 +112,10 @@ struct gpio_irq_chip { * variant named &gpiochip_populate_parent_fwspec_fourcell is also * available. */ - void *(*populate_parent_alloc_arg)(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type); + int (*populate_parent_alloc_arg)(struct gpio_chip *gc, + union gpio_irq_fwspec *fwspec, + unsigned int parent_hwirq, + unsigned int parent_type); /** * @child_offset_to_irq: @@ -649,28 +659,14 @@ struct bgpio_pdata { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, +int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, + union gpio_irq_fwspec *gfwspec, + unsigned int parent_hwirq, + unsigned int parent_type); +int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, + union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); -void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type); - -#else - -static inline void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type) -{ - return NULL; -} - -static inline void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type) -{ - return NULL; -} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 76878b357ffa..d57cab4d4c06 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -92,14 +92,6 @@ void irq_exit_rcu(void); #define arch_nmi_exit() do { } while (0) #endif -#ifdef CONFIG_TINY_RCU -static inline void rcu_nmi_enter(void) { } -static inline void rcu_nmi_exit(void) { } -#else -extern void rcu_nmi_enter(void); -extern void rcu_nmi_exit(void); -#endif - /* * NMI vs Tracing * -------------- @@ -124,7 +116,7 @@ extern void rcu_nmi_exit(void); do { \ __nmi_enter(); \ lockdep_hardirq_enter(); \ - rcu_nmi_enter(); \ + ct_nmi_enter(); \ instrumentation_begin(); \ ftrace_nmi_enter(); \ instrumentation_end(); \ @@ -143,7 +135,7 @@ extern void rcu_nmi_exit(void); instrumentation_begin(); \ ftrace_nmi_exit(); \ instrumentation_end(); \ - rcu_nmi_exit(); \ + ct_nmi_exit(); \ lockdep_hardirq_exit(); \ __nmi_exit(); \ } while (0) diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index cddb42ff0473..034b1106d022 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -8,7 +8,7 @@ #ifdef CONFIG_KMAP_LOCAL void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); -void kunmap_local_indexed(void *vaddr); +void kunmap_local_indexed(const void *vaddr); void kmap_local_fork(struct task_struct *tsk); void __kmap_local_sched_out(void); void __kmap_local_sched_in(void); @@ -89,7 +89,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) return __kmap_local_pfn_prot(pfn, kmap_prot); } -static inline void __kunmap_local(void *vaddr) +static inline void __kunmap_local(const void *vaddr) { kunmap_local_indexed(vaddr); } @@ -121,7 +121,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) return __kmap_local_pfn_prot(pfn, kmap_prot); } -static inline void __kunmap_atomic(void *addr) +static inline void __kunmap_atomic(const void *addr) { kunmap_local_indexed(addr); pagefault_enable(); @@ -197,7 +197,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) return kmap_local_page(pfn_to_page(pfn)); } -static inline void __kunmap_local(void *addr) +static inline void __kunmap_local(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); @@ -224,7 +224,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) return kmap_atomic(pfn_to_page(pfn)); } -static inline void __kunmap_atomic(void *addr) +static inline void __kunmap_atomic(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6cabafffd0dd..116e8bd68c99 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -265,6 +265,12 @@ struct hisi_qm_list { void (*unregister_from_crypto)(struct hisi_qm *qm); }; +struct hisi_qm_poll_data { + struct hisi_qm *qm; + struct work_struct work; + u16 *qp_finish_id; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -302,6 +308,7 @@ struct hisi_qm { struct rw_semaphore qps_lock; struct idr qp_idr; struct hisi_qp *qp_array; + struct hisi_qm_poll_data *poll_data; struct mutex mailbox_lock; @@ -312,7 +319,6 @@ struct hisi_qm { u32 error_mask; struct workqueue_struct *wq; - struct work_struct work; struct work_struct rst_work; struct work_struct cmd_process; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 426b1744215e..81708ca0ebc7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -140,6 +140,11 @@ static inline int ima_measure_critical_data(const char *event_label, #endif /* CONFIG_IMA */ +#ifdef CONFIG_HAVE_IMA_KEXEC +int __init ima_free_kexec_buffer(void); +int __init ima_get_kexec_buffer(void **addr, size_t *size); +#endif + #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h new file mode 100644 index 000000000000..f7fab3758cb9 --- /dev/null +++ b/include/linux/io_uring_types.h @@ -0,0 +1,574 @@ +#ifndef IO_URING_TYPES_H +#define IO_URING_TYPES_H + +#include <linux/blkdev.h> +#include <linux/task_work.h> +#include <linux/bitmap.h> +#include <linux/llist.h> +#include <uapi/linux/io_uring.h> + +struct io_wq_work_node { + struct io_wq_work_node *next; +}; + +struct io_wq_work_list { + struct io_wq_work_node *first; + struct io_wq_work_node *last; +}; + +struct io_wq_work { + struct io_wq_work_node list; + unsigned flags; + /* place it here instead of io_kiocb as it fills padding and saves 4B */ + int cancel_seq; +}; + +struct io_fixed_file { + /* file * with additional FFS_* flags */ + unsigned long file_ptr; +}; + +struct io_file_table { + struct io_fixed_file *files; + unsigned long *bitmap; + unsigned int alloc_hint; +}; + +struct io_notif; +struct io_notif_slot; + +struct io_hash_bucket { + spinlock_t lock; + struct hlist_head list; +} ____cacheline_aligned_in_smp; + +struct io_hash_table { + struct io_hash_bucket *hbs; + unsigned hash_bits; +}; + +/* + * Arbitrary limit, can be raised if need be + */ +#define IO_RINGFD_REG_MAX 16 + +struct io_uring_task { + /* submission side */ + int cached_refs; + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct file *registered_rings[IO_RINGFD_REG_MAX]; + + struct xarray xa; + struct wait_queue_head wait; + atomic_t in_idle; + atomic_t inflight_tracked; + struct percpu_counter inflight; + + struct { /* task_work */ + struct llist_head task_list; + struct callback_head task_work; + } ____cacheline_aligned_in_smp; +}; + +struct io_uring { + u32 head ____cacheline_aligned_in_smp; + u32 tail ____cacheline_aligned_in_smp; +}; + +/* + * This data is shared with the application through the mmap at offsets + * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ +struct io_rings { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head of the sq ring and the tail of the cq ring, + * and the application controls tail of the sq ring and the head of the + * cq ring. + */ + struct io_uring sq, cq; + /* + * Bitmasks to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ + u32 sq_ring_mask, cq_ring_mask; + /* Ring sizes (constant, power of 2) */ + u32 sq_ring_entries, cq_ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ + u32 sq_dropped; + /* + * Runtime SQ flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ + atomic_t sq_flags; + /* + * Runtime CQ flags + * + * Written by the application, shouldn't be modified by the + * kernel. + */ + u32 cq_flags; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending than there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ + u32 cq_overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ + struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; +}; + +struct io_restriction { + DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); + DECLARE_BITMAP(sqe_op, IORING_OP_LAST); + u8 sqe_flags_allowed; + u8 sqe_flags_required; + bool registered; +}; + +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + +struct io_submit_state { + /* inline/task_work completion list, under ->uring_lock */ + struct io_wq_work_node free_list; + /* batch completion logic */ + struct io_wq_work_list compl_reqs; + struct io_submit_link link; + + bool plug_started; + bool need_plug; + unsigned short submit_nr; + struct blk_plug plug; +}; + +struct io_ev_fd { + struct eventfd_ctx *cq_ev_fd; + unsigned int eventfd_async: 1; + struct rcu_head rcu; +}; + +struct io_alloc_cache { + struct hlist_head list; + unsigned int nr_cached; +}; + +struct io_ring_ctx { + /* const or read-mostly hot data */ + struct { + struct percpu_ref refs; + + struct io_rings *rings; + unsigned int flags; + enum task_work_notify_mode notify_method; + unsigned int compat: 1; + unsigned int drain_next: 1; + unsigned int restricted: 1; + unsigned int off_timeout_used: 1; + unsigned int drain_active: 1; + unsigned int drain_disabled: 1; + unsigned int has_evfd: 1; + unsigned int syscall_iopoll: 1; + } ____cacheline_aligned_in_smp; + + /* submission data */ + struct { + struct mutex uring_lock; + + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ + u32 *sq_array; + struct io_uring_sqe *sq_sqes; + unsigned cached_sq_head; + unsigned sq_entries; + + /* + * Fixed resources fast path, should be accessed only under + * uring_lock, and updated through io_uring_register(2) + */ + struct io_rsrc_node *rsrc_node; + int rsrc_cached_refs; + atomic_t cancel_seq; + struct io_file_table file_table; + unsigned nr_user_files; + unsigned nr_user_bufs; + struct io_mapped_ubuf **user_bufs; + struct io_notif_slot *notif_slots; + unsigned nr_notif_slots; + + struct io_submit_state submit_state; + + struct io_buffer_list *io_bl; + struct xarray io_bl_xa; + struct list_head io_buffers_cache; + + struct io_hash_table cancel_table_locked; + struct list_head cq_overflow_list; + struct io_alloc_cache apoll_cache; + struct io_alloc_cache netmsg_cache; + } ____cacheline_aligned_in_smp; + + /* IRQ completion list, under ->completion_lock */ + struct io_wq_work_list locked_free_list; + unsigned int locked_free_nr; + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned long check_cq; + + unsigned int file_alloc_start; + unsigned int file_alloc_end; + + struct xarray personalities; + u32 pers_next; + + struct { + /* + * We cache a range of free CQEs we can use, once exhausted it + * should go through a slower range setup, see __io_get_cqe() + */ + struct io_uring_cqe *cqe_cached; + struct io_uring_cqe *cqe_sentinel; + + unsigned cached_cq_tail; + unsigned cq_entries; + struct io_ev_fd __rcu *io_ev_fd; + struct wait_queue_head cq_wait; + unsigned cq_extra; + } ____cacheline_aligned_in_smp; + + struct { + spinlock_t completion_lock; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + struct io_wq_work_list iopoll_list; + struct io_hash_table cancel_table; + bool poll_multi_queue; + + struct list_head io_buffers_comp; + } ____cacheline_aligned_in_smp; + + /* timeouts */ + struct { + spinlock_t timeout_lock; + atomic_t cq_timeouts; + struct list_head timeout_list; + struct list_head ltimeout_list; + unsigned cq_last_tm_flush; + } ____cacheline_aligned_in_smp; + + /* Keep this last, we don't need it for the fast path */ + + struct io_restriction restrictions; + struct task_struct *submitter_task; + + /* slow path rsrc auxilary data, used by update/register */ + struct io_rsrc_node *rsrc_backup_node; + struct io_mapped_ubuf *dummy_ubuf; + struct io_rsrc_data *file_data; + struct io_rsrc_data *buf_data; + + struct delayed_work rsrc_put_work; + struct llist_head rsrc_put_llist; + struct list_head rsrc_ref_list; + spinlock_t rsrc_ref_lock; + + struct list_head io_buffers_pages; + + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + + /* Only used for accounting purposes */ + struct user_struct *user; + struct mm_struct *mm_account; + + /* ctx exit and cancelation */ + struct llist_head fallback_llist; + struct delayed_work fallback_work; + struct work_struct exit_work; + struct list_head tctx_list; + struct completion ref_comp; + + /* io-wq management, e.g. thread count */ + u32 iowq_limits[2]; + bool iowq_limits_set; + + struct list_head defer_list; + unsigned sq_thread_idle; + /* protected by ->completion_lock */ + unsigned evfd_last_cq_tail; +}; + +enum { + REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, + REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, + REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, + REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, + REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, + REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + + /* first byte is taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 8, + REQ_F_INFLIGHT_BIT, + REQ_F_CUR_POS_BIT, + REQ_F_NOWAIT_BIT, + REQ_F_LINK_TIMEOUT_BIT, + REQ_F_NEED_CLEANUP_BIT, + REQ_F_POLLED_BIT, + REQ_F_BUFFER_SELECTED_BIT, + REQ_F_BUFFER_RING_BIT, + REQ_F_REISSUE_BIT, + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, + REQ_F_ASYNC_DATA_BIT, + REQ_F_SKIP_LINK_CQES_BIT, + REQ_F_SINGLE_POLL_BIT, + REQ_F_DOUBLE_POLL_BIT, + REQ_F_PARTIAL_IO_BIT, + REQ_F_CQE32_INIT_BIT, + REQ_F_APOLL_MULTISHOT_BIT, + REQ_F_CLEAR_POLLIN_BIT, + REQ_F_HASH_LOCKED_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_SUPPORT_NOWAIT_BIT, + REQ_F_ISREG_BIT, + + /* not a real bit, just to check we're not overflowing the space */ + __REQ_F_LAST_BIT, +}; + +enum { + /* ctx owns file */ + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + /* drain existing IO first */ + REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + /* linked sqes */ + REQ_F_LINK = BIT(REQ_F_LINK_BIT), + /* doesn't sever on completion < 0 */ + REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + /* IOSQE_ASYNC */ + REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + /* IOSQE_BUFFER_SELECT */ + REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* IOSQE_CQE_SKIP_SUCCESS */ + REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + + /* fail rest of links */ + REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + /* on inflight list, should be cancelled and waited on exit reliably */ + REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + /* read/write uses file position */ + REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + /* must not punt to workers */ + REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + /* has or had linked timeout */ + REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + /* needs cleanup */ + REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + /* already went through poll handler */ + REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + /* buffer already selected */ + REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* buffer selected from ring, needs commit */ + REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + /* supports async reads/writes */ + REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + /* regular file */ + REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + /* has creds assigned */ + REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + /* skip refcounting if not set */ + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + /* ->async_data allocated */ + REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + /* don't post CQEs while failing linked requests */ + REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + /* single poll may be active */ + REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + /* double poll may active */ + REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), + /* request has already done partial IO */ + REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + /* fast poll multishot mode */ + REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + /* ->extra1 and ->extra2 are initialised */ + REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), + /* recvmsg special flag, clear EPOLLIN */ + REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ + REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), +}; + +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); + +struct io_task_work { + struct llist_node node; + io_req_tw_func_t func; +}; + +struct io_cqe { + __u64 user_data; + __s32 res; + /* fd initially, then cflags for completion */ + union { + __u32 flags; + int fd; + }; +}; + +/* + * Each request type overlays its private data structure on top of this one. + * They must not exceed this one in size. + */ +struct io_cmd_data { + struct file *file; + /* each command gets 56 bytes of data */ + __u8 data[56]; +}; + +#define io_kiocb_to_cmd(req) ((void *) &(req)->cmd) +#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) + +struct io_kiocb { + union { + /* + * NOTE! Each of the io_kiocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'file' in this struct. + */ + struct file *file; + struct io_cmd_data cmd; + }; + + u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; + /* + * Can be either a fixed buffer index, or used with provided buffers. + * For the latter, before issue it points to the buffer group ID, + * and after selection it points to the buffer ID itself. + */ + u16 buf_index; + unsigned int flags; + + struct io_cqe cqe; + + struct io_ring_ctx *ctx; + struct task_struct *task; + + struct io_rsrc_node *rsrc_node; + + union { + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; + + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; + + /* + * stores buffer ID for ring provided buffers, valid IFF + * REQ_F_BUFFER_RING is set. + */ + struct io_buffer_list *buf_list; + }; + + union { + /* used by request caches, completion batching and iopoll */ + struct io_wq_work_node comp_list; + /* cache ->apoll->events */ + __poll_t apoll_events; + }; + atomic_t refs; + atomic_t poll_refs; + struct io_task_work io_task_work; + /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ + union { + struct hlist_node hash_node; + struct { + u64 extra1; + u64 extra2; + }; + }; + /* internal polling, see IORING_FEAT_FAST_POLL */ + struct async_poll *apoll; + /* opcode allocated if it needs to store data for async defer */ + void *async_data; + /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + struct io_kiocb *link; + /* custom credentials, valid IFF REQ_F_CREDS is set */ + const struct cred *creds; + struct io_wq_work work; +}; + +struct io_overflow_cqe { + struct list_head list; + struct io_uring_cqe cqe; +}; + +#endif diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..25ac28175e4f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -231,12 +231,6 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -#ifdef CONFIG_MIGRATION -int iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#else -#define iomap_migrate_page NULL -#endif int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, @@ -353,6 +347,12 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_PARTIAL (1 << 2) +/* + * The caller will sync the write if needed; do not sync it within + * iomap_dio_rw. Overrides IOMAP_DIO_FORCE_WAIT. + */ +#define IOMAP_DIO_NOSYNC (1 << 3) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3f53bc27a19b..7578d4f6a969 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -11,7 +11,7 @@ /* * Default IO priority. */ -#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) /* * Check that a priority value has a valid class. @@ -46,23 +46,19 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } -/* - * If the calling process has set an I/O priority, use that. Otherwise, return - * the default I/O priority. - */ -static inline int get_current_ioprio(void) +#ifdef CONFIG_BLOCK +int __get_task_ioprio(struct task_struct *p); +#else +static inline int __get_task_ioprio(struct task_struct *p) { - struct io_context *ioc = current->io_context; - - if (ioc) - return ioc->ioprio; return IOPRIO_DEFAULT; } +#endif /* CONFIG_BLOCK */ -/* - * For inheritance, return the highest of the two given priorities - */ -extern int ioprio_best(unsigned short aprio, unsigned short bprio); +static inline int get_current_ioprio(void) +{ + return __get_task_ioprio(current); +} extern int set_task_ioprio(struct task_struct *task, int ioprio); diff --git a/include/linux/irq.h b/include/linux/irq.h index 505308253d23..c3eb89606c2b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -151,7 +151,9 @@ struct irq_common_data { #endif void *handler_data; struct msi_desc *msi_desc; +#ifdef CONFIG_SMP cpumask_var_t affinity; +#endif #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK cpumask_var_t effective_affinity; #endif @@ -879,21 +881,34 @@ static inline int irq_data_get_node(struct irq_data *d) return irq_common_data_get_node(d->common); } -static inline struct cpumask *irq_get_affinity_mask(int irq) +static inline +const struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { - struct irq_data *d = irq_get_irq_data(irq); +#ifdef CONFIG_SMP + return d->common->affinity; +#else + return cpumask_of(0); +#endif +} - return d ? d->common->affinity : NULL; +static inline void irq_data_update_affinity(struct irq_data *d, + const struct cpumask *m) +{ +#ifdef CONFIG_SMP + cpumask_copy(d->common->affinity, m); +#endif } -static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) +static inline const struct cpumask *irq_get_affinity_mask(int irq) { - return d->common->affinity; + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_affinity_mask(d) : NULL; } #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK static inline -struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { return d->common->effective_affinity; } @@ -908,13 +923,14 @@ static inline void irq_data_update_effective_affinity(struct irq_data *d, { } static inline -struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { - return d->common->affinity; + return irq_data_get_affinity_mask(d); } #endif -static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +static inline +const struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); @@ -1121,6 +1137,7 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on); /* Setup functions for irq_chip_generic */ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw_irq); +void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq); struct irq_chip_generic * irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler); diff --git a/include/linux/irqchip/mmp.h b/include/linux/irqchip/mmp.h index cb8455c87c8a..aa1813749a4f 100644 --- a/include/linux/irqchip/mmp.h +++ b/include/linux/irqchip/mmp.h @@ -4,4 +4,7 @@ extern struct irq_chip icu_irq_chip; +extern void icu_init_irq(void); +extern void mmp2_init_icu(void); + #endif /* __IRQCHIP_MMP_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a77584593f7d..1cd4e36890fb 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -209,14 +209,15 @@ static inline void irq_set_handler_locked(struct irq_data *data, * Must be called with irq_desc locked and valid parameters. */ static inline void -irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, +irq_set_chip_handler_name_locked(struct irq_data *data, + const struct irq_chip *chip, irq_flow_handler_t handler, const char *name) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; desc->name = name; - data->chip = chip; + data->chip = (struct irq_chip *)chip; } bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e79d6e0b14e8..dc1724131300 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1557,7 +1557,7 @@ extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, - unsigned long, int); + unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index bf1eef337a07..570831ca9951 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -220,8 +220,6 @@ extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); -extern void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); @@ -230,12 +228,12 @@ extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); -extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); +extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); /* * We should be using ATOMIC_INIT() for initializing .enabled, but @@ -303,11 +301,6 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline int jump_label_apply_nops(struct module *mod) -{ - return 0; -} - static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 69ae6b278464..ddb5a358fd82 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,6 +28,9 @@ enum cpu_usage_stat { CPUTIME_STEAL, CPUTIME_GUEST, CPUTIME_GUEST_NICE, +#ifdef CONFIG_SCHED_CORE + CPUTIME_FORCEIDLE, +#endif NR_STATS, }; @@ -115,4 +118,8 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_idle_ticks(unsigned long ticks); +#ifdef CONFIG_SCHED_CORE +extern void __account_forceidle_time(struct task_struct *tsk, u64 delta); +#endif + #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 475683cd67f1..13e6c4b58f07 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -19,6 +19,7 @@ #include <asm/io.h> #include <uapi/linux/kexec.h> +#include <linux/verification.h> /* Location of a reserved region to hold the crash kernel. */ @@ -188,21 +189,54 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +void *kexec_image_load_default(struct kimage *image); + +#ifndef arch_kexec_kernel_image_probe +static inline int +arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) +{ + return kexec_image_probe_default(image, buf, buf_len); +} +#endif + +#ifndef arch_kimage_file_post_load_cleanup +static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + return kexec_image_post_load_cleanup_default(image); +} +#endif + +#ifndef arch_kexec_kernel_image_load +static inline void *arch_kexec_kernel_image_load(struct kimage *image) +{ + return kexec_image_load_default(image); +} +#endif -/* Architectures may override the below functions */ -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kimage_file_post_load_cleanup(struct kimage *image); #ifdef CONFIG_KEXEC_SIG -int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len); +#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION +int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); +#endif #endif -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); +#ifndef arch_kexec_locate_mem_hole +/** + * arch_kexec_locate_mem_hole - Find free memory to place the segments. + * @kbuf: Parameters for the memory search. + * + * On success, kbuf->mem will have the start address of the memory region found. + * + * Return: 0 on success, negative errno on error. + */ +static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + return kexec_locate_mem_hole(kbuf); +} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 @@ -358,7 +392,10 @@ extern void machine_kexec_cleanup(struct kimage *image); extern int kernel_kexec(void); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -int machine_kexec_post_load(struct kimage *image); + +#ifndef machine_kexec_post_load +static inline int machine_kexec_post_load(struct kimage *image) { return 0; } +#endif extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); @@ -391,10 +428,21 @@ extern bool kexec_in_progress; int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); -void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -void arch_kexec_protect_crashkres(void); -void arch_kexec_unprotect_crashkres(void); +#ifndef arch_kexec_protect_crashkres +/* + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +static inline void arch_kexec_protect_crashkres(void) { } +#endif + +#ifndef arch_kexec_unprotect_crashkres +static inline void arch_kexec_unprotect_crashkres(void) { } +#endif #ifndef page_to_boot_pfn static inline unsigned long page_to_boot_pfn(struct page *page) @@ -424,6 +472,16 @@ static inline phys_addr_t boot_phys_to_phys(unsigned long boot_phys) } #endif +#ifndef crash_free_reserved_phys_range +static inline void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(boot_pfn_to_page(addr >> PAGE_SHIFT)); +} +#endif + static inline unsigned long virt_to_boot_phys(void *addr) { return phys_to_boot_phys(__pa((unsigned long)addr)); diff --git a/include/linux/libata.h b/include/linux/libata.h index 0f2a59c9c735..0269ff114f5a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -275,7 +275,7 @@ enum { PORT_DISABLED = 2, /* encoding various smaller bitmaps into a single - * unsigned long bitmap + * unsigned int bitmap */ ATA_NR_PIO_MODES = 7, ATA_NR_MWDMA_MODES = 5, @@ -426,12 +426,9 @@ enum { }; enum ata_xfer_mask { - ATA_MASK_PIO = ((1LU << ATA_NR_PIO_MODES) - 1) - << ATA_SHIFT_PIO, - ATA_MASK_MWDMA = ((1LU << ATA_NR_MWDMA_MODES) - 1) - << ATA_SHIFT_MWDMA, - ATA_MASK_UDMA = ((1LU << ATA_NR_UDMA_MODES) - 1) - << ATA_SHIFT_UDMA, + ATA_MASK_PIO = ((1U << ATA_NR_PIO_MODES) - 1) << ATA_SHIFT_PIO, + ATA_MASK_MWDMA = ((1U << ATA_NR_MWDMA_MODES) - 1) << ATA_SHIFT_MWDMA, + ATA_MASK_UDMA = ((1U << ATA_NR_UDMA_MODES) - 1) << ATA_SHIFT_UDMA, }; enum hsm_task_states { @@ -680,9 +677,9 @@ struct ata_device { unsigned int cdb_len; /* per-dev xfer mask */ - unsigned long pio_mask; - unsigned long mwdma_mask; - unsigned long udma_mask; + unsigned int pio_mask; + unsigned int mwdma_mask; + unsigned int udma_mask; /* for CHS addressing */ u16 cylinders; /* Number of cylinders */ @@ -850,7 +847,7 @@ struct ata_port { enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; - unsigned long fastdrain_cnt; + unsigned int fastdrain_cnt; async_cookie_t cookie; @@ -885,7 +882,7 @@ struct ata_port_operations { * Configuration and exception handling */ int (*cable_detect)(struct ata_port *ap); - unsigned long (*mode_filter)(struct ata_device *dev, unsigned long xfer_mask); + unsigned int (*mode_filter)(struct ata_device *dev, unsigned int xfer_mask); void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); @@ -981,9 +978,9 @@ struct ata_port_operations { struct ata_port_info { unsigned long flags; unsigned long link_flags; - unsigned long pio_mask; - unsigned long mwdma_mask; - unsigned long udma_mask; + unsigned int pio_mask; + unsigned int mwdma_mask; + unsigned int udma_mask; struct ata_port_operations *port_ops; void *private_data; }; @@ -1102,16 +1099,18 @@ extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); -extern unsigned long ata_pack_xfermask(unsigned long pio_mask, - unsigned long mwdma_mask, unsigned long udma_mask); -extern void ata_unpack_xfermask(unsigned long xfer_mask, - unsigned long *pio_mask, unsigned long *mwdma_mask, - unsigned long *udma_mask); -extern u8 ata_xfer_mask2mode(unsigned long xfer_mask); -extern unsigned long ata_xfer_mode2mask(u8 xfer_mode); +extern unsigned int ata_pack_xfermask(unsigned int pio_mask, + unsigned int mwdma_mask, + unsigned int udma_mask); +extern void ata_unpack_xfermask(unsigned int xfer_mask, + unsigned int *pio_mask, + unsigned int *mwdma_mask, + unsigned int *udma_mask); +extern u8 ata_xfer_mask2mode(unsigned int xfer_mask); +extern unsigned int ata_xfer_mode2mask(u8 xfer_mode); extern int ata_xfer_mode2shift(u8 xfer_mode); -extern const char *ata_mode_string(unsigned long xfer_mask); -extern unsigned long ata_id_xfermask(const u16 *id); +extern const char *ata_mode_string(unsigned int xfer_mask); +extern unsigned int ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, @@ -1283,8 +1282,8 @@ static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) } int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); -unsigned long ata_acpi_gtm_xfermask(struct ata_device *dev, - const struct ata_acpi_gtm *gtm); +unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, + const struct ata_acpi_gtm *gtm); int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b6829b970093..1f1099dac3f0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -188,7 +188,7 @@ static inline void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner, u8 outer) { - lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); + lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); } static inline void @@ -211,24 +211,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, * or they are too narrow (they suffer from a false class-split): */ #define lockdep_set_class(lock, key) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_name(lock, key, name) \ - lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_subclass(lock, key, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eafa1d2489fd..806448173033 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -201,6 +201,7 @@ LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, int flags) LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, int flags) +LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 91c8146649f5..84a0d7e02176 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -702,6 +702,13 @@ * @old is the set of credentials that are being replaced. * @flags contains one of the LSM_SETID_* values. * Return 0 on success. + * @task_fix_setgroups: + * Update the module's state after setting the supplementary group + * identity attributes of the current process. + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaced. + * Return 0 on success. * @task_setpgid: * Check permission before setting the process group identifier of the * process @p to @pgid. diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h index ed37dc40e82a..f70a810c55f7 100644 --- a/include/linux/mfd/bcm2835-pm.h +++ b/include/linux/mfd/bcm2835-pm.h @@ -9,6 +9,7 @@ struct bcm2835_pm { struct device *dev; void __iomem *base; void __iomem *asb; + void __iomem *rpivid_asb; }; #endif /* BCM2835_MFD_PM_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..ae5bb67a9ba1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,27 +19,59 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/** + * struct movable_operations - Driver page migration + * @isolate_page: + * The VM calls this function to prepare the page to be moved. The page + * is locked and the driver should not unlock it. The driver should + * return ``true`` if the page is movable and ``false`` if it is not + * currently movable. After this function returns, the VM uses the + * page->lru field, so the driver must preserve any information which + * is usually stored here. + * + * @migrate_page: + * After isolation, the VM calls this function with the isolated + * @src page. The driver should copy the contents of the + * @src page to the @dst page and set up the fields of @dst page. + * Both pages are locked. + * If page migration is successful, the driver should call + * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If the driver cannot migrate the page at the moment, it can return + * -EAGAIN. The VM interprets this as a temporary migration failure and + * will retry it later. Any other error value is a permanent migration + * failure and migration will not be retried. + * The driver shouldn't touch the @src->lru field while in the + * migrate_page() function. It may write to @dst->lru. + * + * @putback_page: + * If migration fails on the isolated page, the VM informs the driver + * that the page is no longer a candidate for migration by calling + * this function. The driver should put the isolated page back into + * its own data structure. + */ +struct movable_operations { + bool (*isolate_page)(struct page *, isolate_mode_t); + int (*migrate_page)(struct page *dst, struct page *src, + enum migrate_mode); + void (*putback_page)(struct page *); +}; + /* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode); +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void migrate_page_states(struct page *newpage, struct page *page); -extern void migrate_page_copy(struct page *newpage, struct page *page); -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page); -extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count); +int migrate_huge_page_move_mapping(struct address_space *mapping, + struct folio *dst, struct folio *src); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); @@ -60,15 +92,8 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline void migrate_page_states(struct page *newpage, struct page *page) -{ -} - -static inline void migrate_page_copy(struct page *newpage, - struct page *page) {} - static inline int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { return -ENOSYS; } @@ -91,13 +116,13 @@ static inline int next_demotion_node(int node) #endif #ifdef CONFIG_COMPACTION -extern int PageMovable(struct page *page); -extern void __SetPageMovable(struct page *page, struct address_space *mapping); -extern void __ClearPageMovable(struct page *page); +bool PageMovable(struct page *page); +void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; } +static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, - struct address_space *mapping) + const struct movable_operations *ops) { } static inline void __ClearPageMovable(struct page *page) @@ -110,6 +135,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *page_movable_ops(struct page *page) +{ + VM_BUG_ON(!__PageMovable(page)); + + return (const struct movable_operations *) + ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); +} + #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index ee5a217de2a8..f6e5369d2928 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -13,6 +13,129 @@ struct user_namespace; */ extern struct user_namespace init_user_ns; +typedef struct { + uid_t val; +} vfsuid_t; + +typedef struct { + gid_t val; +} vfsgid_t; + +static_assert(sizeof(vfsuid_t) == sizeof(kuid_t)); +static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); +static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); +static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); + +#ifdef CONFIG_MULTIUSER +static inline uid_t __vfsuid_val(vfsuid_t uid) +{ + return uid.val; +} + +static inline gid_t __vfsgid_val(vfsgid_t gid) +{ + return gid.val; +} +#else +static inline uid_t __vfsuid_val(vfsuid_t uid) +{ + return 0; +} + +static inline gid_t __vfsgid_val(vfsgid_t gid) +{ + return 0; +} +#endif + +static inline bool vfsuid_valid(vfsuid_t uid) +{ + return __vfsuid_val(uid) != (uid_t)-1; +} + +static inline bool vfsgid_valid(vfsgid_t gid) +{ + return __vfsgid_val(gid) != (gid_t)-1; +} + +static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right) +{ + return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right); +} + +static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) +{ + return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right); +} + +/** + * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value + * @vfsuid: the vfsuid to compare + * @kuid: the kuid to compare + * + * Check whether @vfsuid and @kuid have the same values. + * + * Return: true if @vfsuid and @kuid have the same value, false if not. + * Comparison between two invalid uids returns false. + */ +static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid); +} + +/** + * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value + * @vfsgid: the vfsgid to compare + * @kgid: the kgid to compare + * + * Check whether @vfsgid and @kgid have the same values. + * + * Return: true if @vfsgid and @kgid have the same value, false if not. + * Comparison between two invalid gids returns false. + */ +static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); +} + +/* + * vfs{g,u}ids are created from k{g,u}ids. + * We don't allow them to be created from regular {u,g}id. + */ +#define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) } +#define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) } + +#define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID) +#define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID) + +/* + * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare + * whether the mapped value is identical to value of a k{g,u}id. + */ +#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) } +#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) } + +#ifdef CONFIG_MULTIUSER +/** + * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups + * @vfsgid: the mnt gid to match + * + * This function can be used to determine whether @vfsuid matches any of the + * caller's groups. + * + * Return: 1 if vfsuid matches caller's groups, 0 if not. + */ +static inline int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return in_group_p(AS_KGIDT(vfsgid)); +} +#else +static inline int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return 1; +} +#endif + /** * initial_idmapping - check whether this is the initial mapping * @ns: idmapping to check @@ -48,7 +171,7 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, } /** - * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * make_vfsuid - map a filesystem kuid into a mnt_userns * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kuid : kuid to be mapped @@ -67,25 +190,33 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is * returned. */ -static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) + +static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) { uid_t uid; if (no_idmapping(mnt_userns, fs_userns)) - return kuid; + return VFSUIDT_INIT(kuid); if (initial_idmapping(fs_userns)) uid = __kuid_val(kuid); else uid = from_kuid(fs_userns, kuid); if (uid == (uid_t)-1) - return INVALID_UID; - return make_kuid(mnt_userns, uid); + return INVALID_VFSUID; + return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); +} + +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid)); } /** - * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * make_vfsgid - map a filesystem kgid into a mnt_userns * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kgid : kgid to be mapped @@ -104,21 +235,56 @@ static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is * returned. */ -static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) + +static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) { gid_t gid; if (no_idmapping(mnt_userns, fs_userns)) - return kgid; + return VFSGIDT_INIT(kgid); if (initial_idmapping(fs_userns)) gid = __kgid_val(kgid); else gid = from_kgid(fs_userns, kgid); if (gid == (gid_t)-1) - return INVALID_GID; - return make_kgid(mnt_userns, gid); + return INVALID_VFSGID; + return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); +} + +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid)); +} + +/** + * from_vfsuid - map a vfsuid into the filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsuid : vfsuid to be mapped + * + * Map @vfsuid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsuid to inode->i_uid. + * + * Return: @vfsuid mapped into the filesystem idmapping + */ +static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsuid_t vfsuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return AS_KUIDT(vfsuid); + uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid)); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); } /** @@ -145,16 +311,66 @@ static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, struct user_namespace *fs_userns, kuid_t kuid) { - uid_t uid; + return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); +} + +/** + * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsuid: vfsuid to be mapped + * + * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this + * function to check whether the filesystem idmapping has a mapping for + * @vfsuid. + * + * Return: true if @vfsuid has a mapping in the filesystem, false if not. + */ +static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsuid_t vfsuid) +{ + return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); +} + +/** + * vfsuid_into_kuid - convert vfsuid into kuid + * @vfsuid: the vfsuid to convert + * + * This can be used when a vfsuid is committed as a kuid. + * + * Return: a kuid with the value of @vfsuid + */ +static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) +{ + return AS_KUIDT(vfsuid); +} + +/** + * from_vfsgid - map a vfsgid into the filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsgid : vfsgid to be mapped + * + * Map @vfsgid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsgid to inode->i_gid. + * + * Return: @vfsgid mapped into the filesystem idmapping + */ +static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsgid_t vfsgid) +{ + gid_t gid; if (no_idmapping(mnt_userns, fs_userns)) - return kuid; - uid = from_kuid(mnt_userns, kuid); - if (uid == (uid_t)-1) - return INVALID_UID; + return AS_KGIDT(vfsgid); + gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid)); + if (gid == (gid_t)-1) + return INVALID_GID; if (initial_idmapping(fs_userns)) - return KUIDT_INIT(uid); - return make_kuid(fs_userns, uid); + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); } /** @@ -181,16 +397,39 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, struct user_namespace *fs_userns, kgid_t kgid) { - gid_t gid; + return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); +} - if (no_idmapping(mnt_userns, fs_userns)) - return kgid; - gid = from_kgid(mnt_userns, kgid); - if (gid == (gid_t)-1) - return INVALID_GID; - if (initial_idmapping(fs_userns)) - return KGIDT_INIT(gid); - return make_kgid(fs_userns, gid); +/** + * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsgid: vfsgid to be mapped + * + * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this + * function to check whether the filesystem idmapping has a mapping for + * @vfsgid. + * + * Return: true if @vfsgid has a mapping in the filesystem, false if not. + */ +static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsgid_t vfsgid) +{ + return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); +} + +/** + * vfsgid_into_kgid - convert vfsgid into kgid + * @vfsgid: the vfsgid to convert + * + * This can be used when a vfsgid is committed as a kgid. + * + * Return: a kgid with the value of @vfsgid + */ +static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) +{ + return AS_KGIDT(vfsgid); } /** @@ -209,7 +448,8 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); + return from_vfsuid(mnt_userns, fs_userns, + VFSUIDT_INIT(current_fsuid())); } /** @@ -228,7 +468,8 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); + return from_vfsgid(mnt_userns, fs_userns, + VFSGIDT_INIT(current_fsgid())); } #endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/include/linux/module.h b/include/linux/module.h index abd9fa916b7d..518296ea7f73 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -505,6 +505,11 @@ struct module { int num_static_call_sites; struct static_call_site *static_call_sites; #endif +#if IS_ENABLED(CONFIG_KUNIT) + int num_kunit_suites; + struct kunit_suite **kunit_suites; +#endif + #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 43986f7ec4dd..1bdc39daac0a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -19,7 +19,5 @@ void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); -int mpage_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #endif diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1b18dfa52e48..f2402ddeafbf 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -276,19 +276,18 @@ struct netfs_cache_ops { }; struct readahead_control; -extern void netfs_readahead(struct readahead_control *); +void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); -extern int netfs_write_begin(struct netfs_inode *, - struct file *, struct address_space *, - loff_t, unsigned int, struct folio **, - void **); - -extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); -extern void netfs_get_subrequest(struct netfs_io_subrequest *subreq, - enum netfs_sreq_ref_trace what); -extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); -extern void netfs_stats_show(struct seq_file *); +int netfs_write_begin(struct netfs_inode *, struct file *, + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); + +void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +void netfs_get_subrequest(struct netfs_io_subrequest *subreq, + enum netfs_sreq_ref_trace what); +void netfs_put_subrequest(struct netfs_io_subrequest *subreq, + bool was_async, enum netfs_sreq_ref_trace what); +void netfs_stats_show(struct seq_file *); /** * netfs_inode - Get the netfs inode context from the inode diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 10bc88cc3bf6..62c54ffbeeaa 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -67,7 +67,7 @@ struct unwind_hint { * It should only be used in special cases where you're 100% sure it won't * affect the reliability of frame pointers and kernel stack traces. * - * For more information, see tools/objtool/Documentation/stack-validation.txt. + * For more information, see tools/objtool/Documentation/objtool.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ static void __used __section(".discard.func_stack_frame_non_standard") \ diff --git a/include/linux/of.h b/include/linux/of.h index f0a5d6b10c5a..20a4e7cb7afe 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -441,8 +441,6 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline, size_t extra_fdt_size); -int ima_get_kexec_buffer(void **addr, size_t *size); -int ima_free_kexec_buffer(void); #else /* CONFIG_OF */ static inline void of_core_init(void) diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index 861e606b820f..b7bce4983638 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -9,15 +9,27 @@ */ #define DO_ONCE_LITE(func, ...) \ DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__) -#define DO_ONCE_LITE_IF(condition, func, ...) \ + +#define __ONCE_LITE_IF(condition) \ ({ \ static bool __section(".data.once") __already_done; \ - bool __ret_do_once = !!(condition); \ + bool __ret_cond = !!(condition); \ + bool __ret_once = false; \ \ - if (unlikely(__ret_do_once && !__already_done)) { \ + if (unlikely(__ret_cond && !__already_done)) { \ __already_done = true; \ - func(__VA_ARGS__); \ + __ret_once = true; \ } \ + unlikely(__ret_once); \ + }) + +#define DO_ONCE_LITE_IF(condition, func, ...) \ + ({ \ + bool __ret_do_once = !!(condition); \ + \ + if (__ONCE_LITE_IF(__ret_do_once)) \ + func(__VA_ARGS__); \ + \ unlikely(__ret_do_once); \ }) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191d..3f5490f6f038 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -639,7 +639,7 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce96866fbec4..cc9adbaddb59 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -718,9 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, - pgoff_t end, unsigned int nr_pages, - struct page **pages); +unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch); unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, @@ -1079,6 +1078,12 @@ static inline int __must_check write_one_page(struct page *page) int __set_page_dirty_nobuffers(struct page *page); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); +#ifdef CONFIG_MIGRATION +int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define filemap_migrate_folio NULL +#endif void page_endio(struct page *page, bool is_write, int err); void folio_end_private_2(struct folio *folio); @@ -1098,8 +1103,6 @@ size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size); -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, @@ -1107,10 +1110,6 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, void filemap_remove_folio(struct folio *folio); void delete_from_page_cache(struct page *page); void __filemap_remove_folio(struct folio *folio, void *shadow); -static inline void __delete_from_page_cache(struct page *page, void *shadow) -{ - __filemap_remove_folio(page_folio(page), shadow); -} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); @@ -1119,22 +1118,6 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); -/* - * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run __SetPageLocked() against it. - */ -static inline int add_to_page_cache(struct page *page, - struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) -{ - int error; - - __SetPageLocked(page); - error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); - if (unlikely(error)) - __ClearPageLocked(page); - return error; -} - /* Must be non-static for BPF error injection */ int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 67b1246f136b..6649154a2115 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,16 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -unsigned pagevec_lookup_range(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start, pgoff_t end); -static inline unsigned pagevec_lookup(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start) -{ - return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); -} - unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); diff --git a/include/linux/panic.h b/include/linux/panic.h index e71161da69c4..c7759b3f2045 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -68,7 +68,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 +#define TAINT_TEST 18 +#define TAINT_FLAGS_COUNT 19 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0178823ce8c2..7fa460ccf7fa 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -556,10 +556,13 @@ #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 +#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d +#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3 +#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 46f9b6fe306e..bf66fe011fa8 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -56,9 +56,13 @@ struct riscv_pmu { struct cpu_hw_events __percpu *hw_events; struct hlist_node node; + struct notifier_block riscv_pm_nb; }; #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) + +void riscv_pmu_start(struct perf_event *event, int flags); +void riscv_pmu_stop(struct perf_event *event, int flags); unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index da759560eec5..ee8b9ecdc03b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -759,6 +759,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index cb0fd633a610..4ea496924106 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } +static inline void pipe_discard_from(struct pipe_inode_info *pipe, + unsigned int old_head) +{ + unsigned int mask = pipe->ring_size - 1; + + while (pipe->head > old_head) + pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 196a157456aa..77f4849e3418 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -109,7 +109,6 @@ extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); -extern int device_init_wakeup(struct device *dev, bool val); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); @@ -167,13 +166,6 @@ static inline int device_set_wakeup_enable(struct device *dev, bool enable) return 0; } -static inline int device_init_wakeup(struct device *dev, bool val) -{ - device_set_wakeup_capable(dev, val); - device_set_wakeup_enable(dev, val); - return 0; -} - static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && dev->power.should_wakeup; @@ -217,4 +209,27 @@ static inline void pm_wakeup_hard_event(struct device *dev) return pm_wakeup_dev_event(dev, 0, true); } +/** + * device_init_wakeup - Device wakeup initialization. + * @dev: Device to handle. + * @enable: Whether or not to enable @dev as a wakeup device. + * + * By default, most devices should leave wakeup disabled. The exceptions are + * devices that everyone expects to be wakeup sources: keyboards, power buttons, + * possibly network interfaces, etc. Also, devices that don't generate their + * own wakeup requests but merely forward requests from one bus to another + * (like PCI bridges) should have wakeup enabled by default. + */ +static inline int device_init_wakeup(struct device *dev, bool enable) +{ + if (enable) { + device_set_wakeup_capable(dev, true); + return device_wakeup_enable(dev); + } else { + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + return 0; + } +} + #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index b65c877d92b8..7d1e604c1325 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -73,6 +73,7 @@ extern int set_posix_acl(struct user_namespace *, struct inode *, int, struct posix_acl *); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); +struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); #ifdef CONFIG_FS_POSIX_ACL int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 1766e1de6956..b6bd3eac2bcc 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -33,21 +33,31 @@ posix_acl_xattr_count(size_t size) } #ifdef CONFIG_FS_POSIX_ACL -void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size); -void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size); +void posix_acl_fix_xattr_from_user(void *value, size_t size); +void posix_acl_fix_xattr_to_user(void *value, size_t size); +void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, + void *value, size_t size); +void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, + void *value, size_t size); #else -static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size) +static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { } -static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size) +static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) +{ +} +static inline void +posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, void *value, + size_t size) +{ +} +static inline void +posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, void *value, + size_t size) { } #endif diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e97a8188f0fd..638507a3c8ff 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -57,6 +57,9 @@ struct pstore_info; * @size: size of @buf * @ecc_notice_size: * ECC information for @buf + * @priv: pointer for backend specific use, will be + * kfree()d by the pstore core if non-NULL + * when the record is freed. * * Valid for PSTORE_TYPE_DMESG @type: * @@ -74,6 +77,7 @@ struct pstore_record { char *buf; ssize_t size; ssize_t ecc_notice_size; + void *priv; int count; enum kmsg_dump_reason reason; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 9771a0761a40..9429930c5566 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -6,9 +6,6 @@ #include <linux/mutex.h> #include <linux/of.h> -struct pwm_capture; -struct seq_file; - struct pwm_chip; /** @@ -252,6 +249,16 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, } /** + * struct pwm_capture - PWM capture data + * @period: period of the PWM signal (in nanoseconds) + * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) + */ +struct pwm_capture { + unsigned int period; + unsigned int duty_cycle; +}; + +/** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM @@ -261,10 +268,6 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * called once per PWM device when the PWM chip is * registered. * @owner: helps prevent removal of modules exporting active PWMs - * @config: configure duty cycles and period length for this PWM - * @set_polarity: configure the polarity of this PWM - * @enable: enable PWM output toggling - * @disable: disable PWM output toggling */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -276,14 +279,6 @@ struct pwm_ops { void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; - - /* Only used by legacy drivers */ - int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns); - int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, - enum pwm_polarity polarity); - int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); - void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); }; /** @@ -312,16 +307,6 @@ struct pwm_chip { struct pwm_device *pwms; }; -/** - * struct pwm_capture - PWM capture data - * @period: period of the PWM signal (in nanoseconds) - * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) - */ -struct pwm_capture { - unsigned int period; - unsigned int duty_cycle; -}; - #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ struct pwm_device *pwm_request(int pwm_id, const char *label); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a0f6668924d3..0d8625d71733 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,11 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) } /* i_mutex must being held */ -static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) +static inline bool is_quota_modification(struct user_namespace *mnt_userns, + struct inode *inode, struct iattr *ia) { - return (ia->ia_valid & ATTR_SIZE) || - (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) || - (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); + return ((ia->ia_valid & ATTR_SIZE) || + i_uid_needs_update(mnt_userns, ia, inode) || + i_gid_needs_update(mnt_userns, ia, inode)); } #if defined(CONFIG_QUOTA) @@ -115,7 +116,8 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id, struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); -int dquot_transfer(struct inode *inode, struct iattr *iattr); +int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, + struct iattr *iattr); static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) { @@ -234,7 +236,8 @@ static inline void dquot_free_inode(struct inode *inode) { } -static inline int dquot_transfer(struct inode *inode, struct iattr *iattr) +static inline int dquot_transfer(struct user_namespace *mnt_userns, + struct inode *inode, struct iattr *iattr) { return 0; } diff --git a/include/linux/random.h b/include/linux/random.h index 20e389a14e5c..3fec206487f6 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -106,32 +106,25 @@ declare_get_random_var_wait(long, unsigned long) */ #include <linux/prandom.h> -#ifdef CONFIG_ARCH_RANDOM -# include <asm/archrandom.h> -#else -static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } -static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } -static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } -#endif +#include <asm/archrandom.h> /* * Called from the boot CPU during startup; not valid to call once * secondary CPUs are up and preemption is possible. */ -#ifndef arch_get_random_seed_long_early -static inline bool __init arch_get_random_seed_long_early(unsigned long *v) +#ifndef arch_get_random_seed_longs_early +static inline size_t __init arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) { WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_seed_long(v); + return arch_get_random_seed_longs(v, max_longs); } #endif -#ifndef arch_get_random_long_early -static inline bool __init arch_get_random_long_early(unsigned long *v) +#ifndef arch_get_random_longs_early +static inline bool __init arch_get_random_longs_early(unsigned long *v, size_t max_longs) { WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_long(v); + return arch_get_random_longs(v, max_longs); } #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..f527f27e6438 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,6 +29,7 @@ #include <linux/lockdep.h> #include <asm/processor.h> #include <linux/cpumask.h> +#include <linux/context_tracking_irq.h> #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -41,6 +42,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); +unsigned long get_completed_synchronize_rcu(void); #ifdef CONFIG_PREEMPT_RCU @@ -103,13 +105,11 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#ifdef CONFIG_NO_HZ_FULL -void rcu_user_enter(void); -void rcu_user_exit(void); +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +void rcu_irq_work_resched(void); #else -static inline void rcu_user_enter(void) { } -static inline void rcu_user_exit(void) { } -#endif /* CONFIG_NO_HZ_FULL */ +static inline void rcu_irq_work_resched(void) { } +#endif #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); @@ -128,7 +128,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, - * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU + * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * @@ -143,9 +143,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ } while (0) /* @@ -169,13 +169,24 @@ void synchronize_rcu_tasks(void); # endif # ifdef CONFIG_TASKS_TRACE_RCU -# define rcu_tasks_trace_qs(t) \ - do { \ - if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ - !unlikely(READ_ONCE((t)->trc_reader_nesting))) { \ - smp_store_release(&(t)->trc_reader_checked, true); \ - smp_mb(); /* Readers partitioned by store. */ \ - } \ +// Bits for ->trc_reader_special.b.need_qs field. +#define TRC_NEED_QS 0x1 // Task needs a quiescent state. +#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. + +u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); +void rcu_tasks_trace_qs_blkd(struct task_struct *t); + +# define rcu_tasks_trace_qs(t) \ + do { \ + int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ + \ + if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + likely(!___rttq_nesting)) { \ + rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ + !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ + rcu_tasks_trace_qs_blkd(t); \ + } \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) @@ -184,7 +195,7 @@ void synchronize_rcu_tasks(void); #define rcu_tasks_qs(t, preempt) \ do { \ rcu_tasks_classic_qs((t), (preempt)); \ - rcu_tasks_trace_qs((t)); \ + rcu_tasks_trace_qs(t); \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 6f9c35817398..9bc8cbb33340 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -75,7 +75,7 @@ static inline void rcu_read_unlock_trace(void) nesting = READ_ONCE(t->trc_reader_nesting) - 1; barrier(); // Critical section before disabling. // Disable IPI-based setting of .need_qs. - WRITE_ONCE(t->trc_reader_nesting, INT_MIN); + WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting); if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5fed476f977f..62815c0a2dce 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -23,6 +23,16 @@ static inline void cond_synchronize_rcu(unsigned long oldstate) might_sleep(); } +static inline unsigned long start_poll_synchronize_rcu_expedited(void) +{ + return start_poll_synchronize_rcu(); +} + +static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) +{ + cond_synchronize_rcu(oldstate); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) @@ -38,7 +48,7 @@ static inline void synchronize_rcu_expedited(void) */ extern void kvfree(const void *addr); -static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) { if (head) { call_rcu(head, func); @@ -51,6 +61,15 @@ static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) kvfree((void *) func); } +#ifdef CONFIG_KASAN_GENERIC +void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +#else +static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + __kvfree_call_rcu(head, func); +} +#endif + void rcu_qs(void); static inline void rcu_softirq_qs(void) @@ -76,12 +95,6 @@ static inline int rcu_needs_cpu(void) static inline void rcu_virt_note_context_switch(int cpu) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } -static inline void rcu_idle_enter(void) { } -static inline void rcu_idle_exit(void) { } -static inline void rcu_irq_enter(void) { } -static inline void rcu_irq_exit_irqson(void) { } -static inline void rcu_irq_enter_irqson(void) { } -static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9c6cfb742504..47eaa4cb0df7 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,17 +40,13 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); +unsigned long start_poll_synchronize_rcu_expedited(void); +void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); -void rcu_idle_enter(void); -void rcu_idle_exit(void); -void rcu_irq_enter(void); -void rcu_irq_exit(void); -void rcu_irq_enter_irqson(void); -void rcu_irq_exit_irqson(void); bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU @@ -59,6 +55,9 @@ void rcu_irq_exit_check_preempt(void); static inline void rcu_irq_exit_check_preempt(void) { } #endif +struct task_struct; +void rcu_preempt_deferred_qs(struct task_struct *t); + void exit_rcu(void); void rcu_scheduler_starting(void); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8952fa3d0d59..7cf2157134ac 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1336,6 +1336,22 @@ static inline int regmap_field_update_bits(struct regmap_field *field, NULL, false, false); } +static inline int regmap_field_set_bits(struct regmap_field *field, + unsigned int bits) +{ + return regmap_field_update_bits_base(field, bits, bits, NULL, false, + false); +} + +static inline int regmap_field_clear_bits(struct regmap_field *field, + unsigned int bits) +{ + return regmap_field_update_bits_base(field, bits, 0, NULL, false, + false); +} + +int regmap_field_test_bits(struct regmap_field *field, unsigned int bits); + static inline int regmap_field_force_update_bits(struct regmap_field *field, unsigned int mask, unsigned int val) @@ -1424,6 +1440,8 @@ struct regmap_irq_sub_irq_map { unsigned int *offset; }; +struct regmap_irq_chip_data; + /** * struct regmap_irq_chip - Description of a generic regmap irq_chip. * @@ -1451,32 +1469,50 @@ struct regmap_irq_sub_irq_map { * main_status set. * * @status_base: Base status register address. - * @mask_base: Base mask register address. - * @mask_writeonly: Base mask register is write only. - * @unmask_base: Base unmask register address. for chips who have - * separate mask and unmask registers + * @mask_base: Base mask register address. Mask bits are set to 1 when an + * interrupt is masked, 0 when unmasked. + * @unmask_base: Base unmask register address. Unmask bits are set to 1 when + * an interrupt is unmasked and 0 when masked. * @ack_base: Base ack address. If zero then the chip is clear on read. * Using zero value is possible with @use_ack bit. * @wake_base: Base address for wake enables. If zero unsupported. - * @type_base: Base address for irq type. If zero unsupported. - * @virt_reg_base: Base addresses for extra config regs. + * @type_base: Base address for irq type. If zero unsupported. Deprecated, + * use @config_base instead. + * @virt_reg_base: Base addresses for extra config regs. Deprecated, use + * @config_base instead. + * @config_base: Base address for IRQ type config regs. If null unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. * @mask_invert: Inverted mask register: cleared bits are masked out. + * Deprecated; prefer describing an inverted mask register as + * an unmask register. + * @mask_unmask_non_inverted: Controls mask bit inversion for chips that set + * both @mask_base and @unmask_base. If false, mask and unmask bits are + * inverted (which is deprecated behavior); if true, bits will not be + * inverted and the registers keep their normal behavior. Note that if + * you use only one of @mask_base or @unmask_base, this flag has no + * effect and is unnecessary. Any new drivers that set both @mask_base + * and @unmask_base should set this to true to avoid relying on the + * deprecated behavior. * @use_ack: Use @ack register even if it is zero. * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. - * @type_invert: Invert the type flags. - * @type_in_mask: Use the mask registers for controlling irq type. For - * interrupts defining type_rising/falling_mask use mask_base - * for edge configuration and never update bits in type_base. + * @type_invert: Invert the type flags. Deprecated, use config registers + * instead. + * @type_in_mask: Use the mask registers for controlling irq type. Use this if + * the hardware provides separate bits for rising/falling edge + * or low/high level interrupts and they should be combined into + * a single logical interrupt. Use &struct regmap_irq_type data + * to define the mask bit for each irq type. * @clear_on_unmask: For chips with interrupts cleared on read: read the status * registers before unmasking interrupts to clear any bits * set when they were masked. * @not_fixed_stride: Used when chip peripherals are not laid out with fixed - * stride. Must be used with sub_reg_offsets containing the - * offsets to each peripheral. + * stride. Must be used with sub_reg_offsets containing the + * offsets to each peripheral. Deprecated; the same thing + * can be accomplished with a @get_irq_reg callback, without + * the need for a @sub_reg_offsets table. * @status_invert: Inverted status register: cleared bits are active interrupts. * @runtime_pm: Hold a runtime PM lock on the device when accessing it. * @@ -1484,17 +1520,28 @@ struct regmap_irq_sub_irq_map { * @irqs: Descriptors for individual IRQs. Interrupt numbers are * assigned based on the index in the array of the interrupt. * @num_irqs: Number of descriptors. - * @num_type_reg: Number of type registers. + * @num_type_reg: Number of type registers. Deprecated, use config registers + * instead. * @num_virt_regs: Number of non-standard irq configuration registers. - * If zero unsupported. - * @type_reg_stride: Stride to use for chips where type registers are not - * contiguous. + * If zero unsupported. Deprecated, use config registers + * instead. + * @num_config_bases: Number of config base registers. + * @num_config_regs: Number of config registers for each config base register. * @handle_pre_irq: Driver specific callback to handle interrupt from device * before regmap_irq_handler process the interrupts. * @handle_post_irq: Driver specific callback to handle interrupt from device * after handling the interrupts in regmap_irq_handler(). * @set_type_virt: Driver specific callback to extend regmap_irq_set_type() - * and configure virt regs. + * and configure virt regs. Deprecated, use @set_type_config + * callback and config registers instead. + * @set_type_config: Callback used for configuring irq types. + * @get_irq_reg: Callback for mapping (base register, index) pairs to register + * addresses. The base register will be one of @status_base, + * @mask_base, etc., @main_status, or any of @config_base. + * The index will be in the range [0, num_main_regs[ for the + * main status base, [0, num_type_settings[ for any config + * register base, and [0, num_regs[ for any other base. + * If unspecified then regmap_irq_get_irq_reg_linear() is used. * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. * @@ -1517,20 +1564,21 @@ struct regmap_irq_chip { unsigned int wake_base; unsigned int type_base; unsigned int *virt_reg_base; + const unsigned int *config_base; unsigned int irq_reg_stride; - bool mask_writeonly:1; - bool init_ack_masked:1; - bool mask_invert:1; - bool use_ack:1; - bool ack_invert:1; - bool clear_ack:1; - bool wake_invert:1; - bool runtime_pm:1; - bool type_invert:1; - bool type_in_mask:1; - bool clear_on_unmask:1; - bool not_fixed_stride:1; - bool status_invert:1; + unsigned int init_ack_masked:1; + unsigned int mask_invert:1; + unsigned int mask_unmask_non_inverted:1; + unsigned int use_ack:1; + unsigned int ack_invert:1; + unsigned int clear_ack:1; + unsigned int wake_invert:1; + unsigned int runtime_pm:1; + unsigned int type_invert:1; + unsigned int type_in_mask:1; + unsigned int clear_on_unmask:1; + unsigned int not_fixed_stride:1; + unsigned int status_invert:1; int num_regs; @@ -1539,16 +1587,24 @@ struct regmap_irq_chip { int num_type_reg; int num_virt_regs; - unsigned int type_reg_stride; + int num_config_bases; + int num_config_regs; int (*handle_pre_irq)(void *irq_drv_data); int (*handle_post_irq)(void *irq_drv_data); int (*set_type_virt)(unsigned int **buf, unsigned int type, unsigned long hwirq, int reg); + int (*set_type_config)(unsigned int **buf, unsigned int type, + const struct regmap_irq *irq_data, int idx); + unsigned int (*get_irq_reg)(struct regmap_irq_chip_data *data, + unsigned int base, int index); void *irq_drv_data; }; -struct regmap_irq_chip_data; +unsigned int regmap_irq_get_irq_reg_linear(struct regmap_irq_chip_data *data, + unsigned int base, int index); +int regmap_irq_set_type_config_simple(unsigned int **buf, unsigned int type, + const struct regmap_irq *irq_data, int idx); int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, int irq_base, const struct regmap_irq_chip *chip, @@ -1769,6 +1825,27 @@ regmap_field_force_update_bits(struct regmap_field *field, return -EINVAL; } +static inline int regmap_field_set_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_clear_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_test_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_fields_write(struct regmap_field *field, unsigned int id, unsigned int val) { diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index bbf6590a6dec..bc6cda706d1f 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -171,10 +171,13 @@ struct regulator; /** * struct regulator_bulk_data - Data used for bulk regulator operations. * - * @supply: The name of the supply. Initialised by the user before - * using the bulk regulator APIs. - * @consumer: The regulator consumer for the supply. This will be managed - * by the bulk API. + * @supply: The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @init_load_uA: After getting the regulator, regulator_set_load() will be + * called with this load. Initialised by the user before + * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This @@ -182,6 +185,7 @@ struct regulator; */ struct regulator_bulk_data { const char *supply; + int init_load_uA; struct regulator *consumer; /* private: Internal use */ @@ -240,6 +244,10 @@ int __must_check regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); +int __must_check devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers); int __must_check regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers); int regulator_bulk_disable(int num_consumers, diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 0228caaa6741..f9a7461e72b8 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -348,6 +348,7 @@ enum regulator_type { * @ramp_delay_table: Table for mapping the regulator ramp-rate values. Values * should be given in units of V/S (uV/uS). See the * regulator_set_ramp_delay_regmap(). + * @n_ramp_values: number of elements at @ramp_delay_table. * * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..d6b0866c71ed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -843,8 +843,9 @@ struct task_struct { int trc_reader_nesting; int trc_ipi_to_cpu; union rcu_special trc_reader_special; - bool trc_reader_checked; struct list_head trc_holdout_list; + struct list_head trc_blkd_node; + int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; @@ -2223,6 +2224,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); +extern struct task_struct *cpu_curr_snapshot(int cpu); /* * In order to reduce various lock holder preemption latencies provide an @@ -2257,7 +2259,7 @@ static inline bool owner_on_cpu(struct task_struct *owner) } /* Returns effective CPU energy utilization, as seen by the scheduler */ -unsigned long sched_cpu_util(int cpu, unsigned long max); +unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ #ifdef CONFIG_RSEQ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index e5af028c08b4..994c25640e15 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) } extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} #else static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; } # define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} #endif extern void normalize_rt_tasks(void); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 56cffe42abbc..816df6cc444e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -81,6 +81,7 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + int nr_idle_scan; }; struct sched_domain { diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 704111f63993..a193884ecf2b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -60,6 +60,12 @@ struct scmi_clock_info { }; }; +enum scmi_power_scale { + SCMI_POWER_BOGOWATTS, + SCMI_POWER_MILLIWATTS, + SCMI_POWER_MICROWATTS +}; + struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; @@ -135,7 +141,7 @@ struct scmi_perf_proto_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, struct device *dev); - bool (*power_scale_mw_get)(const struct scmi_protocol_handle *ph); + enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; /** @@ -561,6 +567,116 @@ struct scmi_voltage_proto_ops { }; /** + * struct scmi_powercap_info - Describe one available Powercap domain + * + * @id: Domain ID as advertised by the platform. + * @notify_powercap_cap_change: CAP change notification support. + * @notify_powercap_measurement_change: MEASUREMENTS change notifications + * support. + * @async_powercap_cap_set: Asynchronous CAP set support. + * @powercap_cap_config: CAP configuration support. + * @powercap_monitoring: Monitoring (measurements) support. + * @powercap_pai_config: PAI configuration support. + * @powercap_scale_mw: Domain reports power data in milliwatt units. + * @powercap_scale_uw: Domain reports power data in microwatt units. + * Note that, when both @powercap_scale_mw and + * @powercap_scale_uw are set to false, the domain + * reports power data on an abstract linear scale. + * @name: name assigned to the Powercap Domain by platform. + * @min_pai: Minimum configurable PAI. + * @max_pai: Maximum configurable PAI. + * @pai_step: Step size between two consecutive PAI values. + * @min_power_cap: Minimum configurable CAP. + * @max_power_cap: Maximum configurable CAP. + * @power_cap_step: Step size between two consecutive CAP values. + * @sustainable_power: Maximum sustainable power consumption for this domain + * under normal conditions. + * @accuracy: The accuracy with which the power is measured and reported in + * integral multiples of 0.001 percent. + * @parent_id: Identifier of the containing parent power capping domain, or the + * value 0xFFFFFFFF if this powercap domain is a root domain not + * contained in any other domain. + */ +struct scmi_powercap_info { + unsigned int id; + bool notify_powercap_cap_change; + bool notify_powercap_measurement_change; + bool async_powercap_cap_set; + bool powercap_cap_config; + bool powercap_monitoring; + bool powercap_pai_config; + bool powercap_scale_mw; + bool powercap_scale_uw; + bool fastchannels; + char name[SCMI_MAX_STR_SIZE]; + unsigned int min_pai; + unsigned int max_pai; + unsigned int pai_step; + unsigned int min_power_cap; + unsigned int max_power_cap; + unsigned int power_cap_step; + unsigned int sustainable_power; + unsigned int accuracy; +#define SCMI_POWERCAP_ROOT_ZONE_ID 0xFFFFFFFFUL + unsigned int parent_id; + struct scmi_fc_info *fc_info; +}; + +/** + * struct scmi_powercap_proto_ops - represents the various operations provided + * by SCMI Powercap Protocol + * + * @num_domains_get: get the count of powercap domains provided by SCMI. + * @info_get: get the information for the specified domain. + * @cap_get: get the current CAP value for the specified domain. + * @cap_set: set the CAP value for the specified domain to the provided value; + * if the domain supports setting the CAP with an asynchronous command + * this request will finally trigger an asynchronous transfer, but, if + * @ignore_dresp here is set to true, this call will anyway return + * immediately without waiting for the related delayed response. + * @pai_get: get the current PAI value for the specified domain. + * @pai_set: set the PAI value for the specified domain to the provided value. + * @measurements_get: retrieve the current average power measurements for the + * specified domain and the related PAI upon which is + * calculated. + * @measurements_threshold_set: set the desired low and high power thresholds + * to be used when registering for notification + * of type POWERCAP_MEASUREMENTS_NOTIFY with this + * powercap domain. + * Note that this must be called at least once + * before registering any callback with the usual + * @scmi_notify_ops; moreover, in case this method + * is called with measurement notifications already + * enabled it will also trigger, transparently, a + * proper update of the power thresholds configured + * in the SCMI backend server. + * @measurements_threshold_get: get the currently configured low and high power + * thresholds used when registering callbacks for + * notification POWERCAP_MEASUREMENTS_NOTIFY. + */ +struct scmi_powercap_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_powercap_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain_id); + int (*cap_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 *power_cap); + int (*cap_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 power_cap, bool ignore_dresp); + int (*pai_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 *pai); + int (*pai_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 pai); + int (*measurements_get)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 *average_power, u32 *pai); + int (*measurements_threshold_set)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 power_thresh_low, + u32 power_thresh_high); + int (*measurements_threshold_get)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 *power_thresh_low, + u32 *power_thresh_high); +}; + +/** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core * @devm_event_notifier_register: Managed registration of a notifier_block for @@ -624,6 +740,9 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information + * @devm_protocol_acquire: devres managed method to get hold of a protocol, + * causing its initialization and related resource + * accounting * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol @@ -642,6 +761,8 @@ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; + int __must_check (*devm_protocol_acquire)(struct scmi_device *sdev, + u8 proto); const void __must_check * (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, struct scmi_protocol_handle **ph); @@ -661,6 +782,7 @@ enum scmi_std_protocol { SCMI_PROTOCOL_SENSOR = 0x15, SCMI_PROTOCOL_RESET = 0x16, SCMI_PROTOCOL_VOLTAGE = 0x17, + SCMI_PROTOCOL_POWERCAP = 0x18, }; enum scmi_system_events { @@ -762,6 +884,8 @@ enum scmi_notification_events { SCMI_EVENT_RESET_ISSUED = 0x0, SCMI_EVENT_BASE_ERROR_EVENT = 0x0, SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER = 0x0, + SCMI_EVENT_POWERCAP_CAP_CHANGED = 0x0, + SCMI_EVENT_POWERCAP_MEASUREMENTS_CHANGED = 0x1, }; struct scmi_power_state_changed_report { @@ -781,8 +905,10 @@ struct scmi_clock_rate_notif_report { struct scmi_system_power_state_notifier_report { ktime_t timestamp; unsigned int agent_id; +#define SCMI_SYSPOWER_IS_REQUEST_GRACEFUL(flags) ((flags) & BIT(0)) unsigned int flags; unsigned int system_state; + unsigned int timeout; }; struct scmi_perf_limits_report { @@ -830,4 +956,18 @@ struct scmi_base_error_report { unsigned long long reports[]; }; +struct scmi_powercap_cap_changed_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int power_cap; + unsigned int pai; +}; + +struct scmi_powercap_meas_changed_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int power; +}; #endif /* _LINUX_SCMI_PROTOCOL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 7fc4e9f49f54..1bc362cb413f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -353,7 +353,8 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct inode *inode, bool rcu); int security_inode_permission(struct inode *inode, int mask); -int security_inode_setattr(struct dentry *dentry, struct iattr *attr); +int security_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, @@ -415,6 +416,7 @@ int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); int security_task_fix_setgid(struct cred *new, const struct cred *old, int flags); +int security_task_fix_setgroups(struct cred *new, const struct cred *old); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); @@ -848,8 +850,9 @@ static inline int security_inode_permission(struct inode *inode, int mask) return 0; } -static inline int security_inode_setattr(struct dentry *dentry, - struct iattr *attr) +static inline int security_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, + struct iattr *attr) { return 0; } @@ -1098,6 +1101,12 @@ static inline int security_task_fix_setgid(struct cred *new, return 0; } +static inline int security_task_fix_setgroups(struct cred *new, + const struct cred *old) +{ + return 0; +} + static inline int security_task_setpgid(struct task_struct *p, pid_t pgid) { return 0; diff --git a/include/linux/soc/mediatek/mtk-mutex.h b/include/linux/soc/mediatek/mtk-mutex.h index 6fe4ffbde290..a0f4f51a3b45 100644 --- a/include/linux/soc/mediatek/mtk-mutex.h +++ b/include/linux/soc/mediatek/mtk-mutex.h @@ -10,11 +10,33 @@ struct regmap; struct device; struct mtk_mutex; +enum mtk_mutex_mod_index { + /* MDP table index */ + MUTEX_MOD_IDX_MDP_RDMA0, + MUTEX_MOD_IDX_MDP_RSZ0, + MUTEX_MOD_IDX_MDP_RSZ1, + MUTEX_MOD_IDX_MDP_TDSHP0, + MUTEX_MOD_IDX_MDP_WROT0, + MUTEX_MOD_IDX_MDP_WDMA, + MUTEX_MOD_IDX_MDP_AAL0, + MUTEX_MOD_IDX_MDP_CCORR0, + + MUTEX_MOD_IDX_MAX /* ALWAYS keep at the end */ +}; + +enum mtk_mutex_sof_index { + MUTEX_SOF_IDX_SINGLE_MODE, + + MUTEX_SOF_IDX_MAX /* ALWAYS keep at the end */ +}; + struct mtk_mutex *mtk_mutex_get(struct device *dev); int mtk_mutex_prepare(struct mtk_mutex *mutex); void mtk_mutex_add_comp(struct mtk_mutex *mutex, enum mtk_ddp_comp_id id); void mtk_mutex_enable(struct mtk_mutex *mutex); +int mtk_mutex_enable_by_cmdq(struct mtk_mutex *mutex, + void *pkt); void mtk_mutex_disable(struct mtk_mutex *mutex); void mtk_mutex_remove_comp(struct mtk_mutex *mutex, enum mtk_ddp_comp_id id); @@ -22,5 +44,10 @@ void mtk_mutex_unprepare(struct mtk_mutex *mutex); void mtk_mutex_put(struct mtk_mutex *mutex); void mtk_mutex_acquire(struct mtk_mutex *mutex); void mtk_mutex_release(struct mtk_mutex *mutex); +int mtk_mutex_write_mod(struct mtk_mutex *mutex, + enum mtk_mutex_mod_index idx, + bool clear); +int mtk_mutex_write_sof(struct mtk_mutex *mutex, + enum mtk_mutex_sof_index idx); #endif /* MTK_MUTEX_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index d2f5c4d5a906..de3701a2a212 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -421,10 +421,9 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); -extern int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs); +extern int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *umsg, + struct sockaddr __user **save_addr); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d361ba26203b..e6c73d5ff1a8 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -17,6 +17,7 @@ #include <uapi/linux/spi/spi.h> #include <linux/acpi.h> +#include <linux/u64_stats_sync.h> struct dma_chan; struct software_node; @@ -34,7 +35,8 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @lock: lock protecting this structure + * @syncp: seqcount to protect members in this struct for per-cpu udate + * on 32-bit systems * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled @@ -59,37 +61,48 @@ extern struct bus_type spi_bus_type; * maxsize limit */ struct spi_statistics { - spinlock_t lock; /* lock for the whole structure */ + struct u64_stats_sync syncp; - unsigned long messages; - unsigned long transfers; - unsigned long errors; - unsigned long timedout; + u64_stats_t messages; + u64_stats_t transfers; + u64_stats_t errors; + u64_stats_t timedout; - unsigned long spi_sync; - unsigned long spi_sync_immediate; - unsigned long spi_async; + u64_stats_t spi_sync; + u64_stats_t spi_sync_immediate; + u64_stats_t spi_async; - unsigned long long bytes; - unsigned long long bytes_rx; - unsigned long long bytes_tx; + u64_stats_t bytes; + u64_stats_t bytes_rx; + u64_stats_t bytes_tx; #define SPI_STATISTICS_HISTO_SIZE 17 - unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; + u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; - unsigned long transfers_split_maxsize; + u64_stats_t transfers_split_maxsize; }; -#define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ - do { \ - unsigned long flags; \ - spin_lock_irqsave(&(stats)->lock, flags); \ - (stats)->field += count; \ - spin_unlock_irqrestore(&(stats)->lock, flags); \ +#define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ + do { \ + struct spi_statistics *__lstats; \ + get_cpu(); \ + __lstats = this_cpu_ptr(pcpu_stats); \ + u64_stats_update_begin(&__lstats->syncp); \ + u64_stats_add(&__lstats->field, count); \ + u64_stats_update_end(&__lstats->syncp); \ + put_cpu(); \ } while (0) -#define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \ - SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) +#define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ + do { \ + struct spi_statistics *__lstats; \ + get_cpu(); \ + __lstats = this_cpu_ptr(pcpu_stats); \ + u64_stats_update_begin(&__lstats->syncp); \ + u64_stats_inc(&__lstats->field); \ + u64_stats_update_end(&__lstats->syncp); \ + put_cpu(); \ + } while (0) /** * struct spi_delay - SPI delay information @@ -149,7 +162,7 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. - * @statistics: statistics for the spi_device + * @pcpu_statistics: statistics for the spi_device * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. @@ -163,13 +176,13 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); struct spi_device { struct device dev; struct spi_controller *controller; - struct spi_controller *master; /* compatibility layer */ + struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; u8 chip_select; u8 bits_per_word; bool rt; -#define SPI_NO_TX BIT(31) /* no transmit wire */ -#define SPI_NO_RX BIT(30) /* no receive wire */ +#define SPI_NO_TX BIT(31) /* No transmit wire */ +#define SPI_NO_RX BIT(30) /* No receive wire */ /* * All bits defined above should be covered by SPI_MODE_KERNEL_MASK. * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart, @@ -186,15 +199,15 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod; /* chip select gpio desc */ - struct spi_delay word_delay; /* inter-word delay */ + struct gpio_desc *cs_gpiod; /* Chip select gpio desc */ + struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; - /* the statistics */ - struct spi_statistics statistics; + /* The statistics */ + struct spi_statistics __percpu *pcpu_statistics; /* * likely need more hooks for more protocol options affecting how @@ -215,7 +228,7 @@ static inline struct spi_device *to_spi_device(struct device *dev) return dev ? container_of(dev, struct spi_device, dev) : NULL; } -/* most drivers won't need to care about device refcounting */ +/* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) { return (spi && get_device(&spi->dev)) ? spi : NULL; @@ -238,7 +251,7 @@ static inline void spi_set_ctldata(struct spi_device *spi, void *state) spi->controller_state = state; } -/* device driver data */ +/* Device driver data */ static inline void spi_set_drvdata(struct spi_device *spi, void *data) { @@ -305,7 +318,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); -/* use a define to avoid include chaining to get THIS_MODULE */ +/* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ __spi_register_driver(THIS_MODULE, driver) @@ -370,10 +383,14 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue - * @idling: the device is entering idle state * @cur_msg: the currently in-flight message - * @cur_msg_prepared: spi_prepare_message was called for the currently - * in-flight message + * @cur_msg_completion: a completion for the current in-flight message + * @cur_msg_incomplete: Flag used internally to opportunistically skip + * the @cur_msg_completion. This flag is used to check if the driver has + * already called spi_finalize_current_message(). + * @cur_msg_need_completion: Flag used internally to opportunistically skip + * the @cur_msg_completion. This flag is used to signal the context that + * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected @@ -433,7 +450,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @max_native_cs: When cs_gpiods is used, and this field is filled in, * spi_register_controller() will validate all native CS (including the * unused native CS) against this value. - * @statistics: statistics for the spi_controller + * @pcpu_statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices @@ -450,6 +467,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @irq_flags: Interrupt enable state during PTP system timestamping * @fallback: fallback to pio if dma transfer return failure with * SPI_TRANS_FAIL_NO_START. + * @queue_empty: signal green light for opportunistically skipping the queue + * for spi_sync transfers. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -467,7 +486,7 @@ struct spi_controller { struct list_head list; - /* other than negative (== assign one dynamically), bus_num is fully + /* Other than negative (== assign one dynamically), bus_num is fully * board-specific. usually that simplifies to being SOC-specific. * example: one SOC has three SPI controllers, numbered 0..2, * and one board's schematics might show it using SPI-2. software @@ -480,7 +499,7 @@ struct spi_controller { */ u16 num_chipselect; - /* some SPI controllers pose alignment requirements on DMAable + /* Some SPI controllers pose alignment requirements on DMAable * buffers; let protocol drivers know about these requirements. */ u16 dma_alignment; @@ -491,29 +510,29 @@ struct spi_controller { /* spi_device.mode flags override flags for this controller */ u32 buswidth_override_bits; - /* bitmask of supported bits_per_word for transfers */ + /* Bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) #define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1) - /* limits on transfer speed */ + /* Limits on transfer speed */ u32 min_speed_hz; u32 max_speed_hz; - /* other constraints relevant to this driver */ + /* Other constraints relevant to this driver */ u16 flags; -#define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* can't do full duplex */ -#define SPI_CONTROLLER_NO_RX BIT(1) /* can't do buffer read */ -#define SPI_CONTROLLER_NO_TX BIT(2) /* can't do buffer write */ -#define SPI_CONTROLLER_MUST_RX BIT(3) /* requires rx */ -#define SPI_CONTROLLER_MUST_TX BIT(4) /* requires tx */ +#define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* Can't do full duplex */ +#define SPI_CONTROLLER_NO_RX BIT(1) /* Can't do buffer read */ +#define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ +#define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ +#define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ - /* flag indicating if the allocation of this struct is devres-managed */ + /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; - /* flag indicating this is an SPI slave controller */ + /* Flag indicating this is an SPI slave controller */ bool slave; /* @@ -529,11 +548,11 @@ struct spi_controller { /* Used to avoid adding the same CS twice */ struct mutex add_lock; - /* lock and mutex for SPI bus locking */ + /* Lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; - /* flag indicating that the SPI bus is locked for exclusive use */ + /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; /* Setup mode and clock, etc (spi driver may call many times). @@ -554,7 +573,7 @@ struct spi_controller { */ int (*set_cs_timing)(struct spi_device *spi); - /* bidirectional bulk transfers + /* Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. @@ -576,7 +595,7 @@ struct spi_controller { int (*transfer)(struct spi_device *spi, struct spi_message *mesg); - /* called on release() to free memory provided by spi_controller */ + /* Called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* @@ -603,12 +622,13 @@ struct spi_controller { spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; - bool idling; + struct completion cur_msg_completion; + bool cur_msg_incomplete; + bool cur_msg_need_completion; bool busy; bool running; bool rt; bool auto_runtime_pm; - bool cur_msg_prepared; bool cur_msg_mapped; char last_cs; bool last_cs_mode_high; @@ -646,14 +666,14 @@ struct spi_controller { s8 unused_native_cs; s8 max_native_cs; - /* statistics */ - struct spi_statistics statistics; + /* Statistics */ + struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; - /* dummy data for full duplex devices */ + /* Dummy data for full duplex devices */ void *dummy_rx; void *dummy_tx; @@ -667,6 +687,9 @@ struct spi_controller { /* Interrupt enable state during PTP system timestamping */ unsigned long irq_flags; + + /* Flag for enabling opportunistic skipping of the queue in spi_sync */ + bool queue_empty; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -715,7 +738,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); -/* the spi driver core manages memory for the spi_controller classdev */ +/* The spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -785,7 +808,7 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_res { struct list_head entry; spi_res_release_t release; - unsigned long long data[]; /* guarantee ull alignment */ + unsigned long long data[]; /* Guarantee ull alignment */ }; /*---------------------------------------------------------------------------*/ @@ -918,7 +941,7 @@ struct spi_res { * and its transfers, ignore them until its completion callback. */ struct spi_transfer { - /* it's ok if tx_buf == rx_buf (right?) + /* It's ok if tx_buf == rx_buf (right?) * for MicroWire, one buffer must be null * buffers must work with dma_*map_single() calls, unless * spi_message.is_dma_mapped reports a pre-existing mapping @@ -975,6 +998,7 @@ struct spi_transfer { * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message * @resources: for resource management when the spi message is processed + * @prepared: spi_prepare_message was called for the this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1008,22 +1032,25 @@ struct spi_message { * tell them about such special cases. */ - /* completion is reported through a callback */ + /* Completion is reported through a callback */ void (*complete)(void *context); void *context; unsigned frame_length; unsigned actual_length; int status; - /* for optional use by whatever driver currently owns the + /* For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; - /* list of spi_res reources when the spi message is processed */ + /* List of spi_res reources when the spi message is processed */ struct list_head resources; + + /* spi_prepare_message() was called for this message */ + bool prepared; }; static inline void spi_message_init_no_memset(struct spi_message *m) @@ -1127,7 +1154,7 @@ spi_max_transfer_size(struct spi_device *spi) if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); - /* transfer size limit must not be greater than messsage size limit */ + /* Transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } @@ -1278,7 +1305,7 @@ spi_read(struct spi_device *spi, void *buf, size_t len) return spi_sync_transfer(spi, &t, 1); } -/* this copies txbuf and rxbuf data; for small transfers only! */ +/* This copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx); @@ -1301,7 +1328,7 @@ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) status = spi_write_then_read(spi, &cmd, 1, &result, 1); - /* return negative errno or unsigned value */ + /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } @@ -1326,7 +1353,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) status = spi_write_then_read(spi, &cmd, 1, &result, 2); - /* return negative errno or unsigned value */ + /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } @@ -1406,7 +1433,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * are active in some dynamic board configuration models. */ struct spi_board_info { - /* the device name and module name are coupled, like platform_bus; + /* The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, @@ -1419,7 +1446,7 @@ struct spi_board_info { void *controller_data; int irq; - /* slower signaling on noisy or low voltage boards */ + /* Slower signaling on noisy or low voltage boards */ u32 max_speed_hz; @@ -1448,7 +1475,7 @@ struct spi_board_info { extern int spi_register_board_info(struct spi_board_info const *info, unsigned n); #else -/* board init code may ignore whether SPI is configured or not */ +/* Board init code may ignore whether SPI is configured or not */ static inline int spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 0c0fed1b348f..8672a7123ccd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -438,7 +438,8 @@ static inline bool node_reclaim_enabled(void) return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } -extern void check_move_unevictable_pages(struct pagevec *pvec); +void check_move_unevictable_folios(struct folio_batch *fbatch); +void check_move_unevictable_pages(struct pagevec *pvec); extern void kswapd_run(int nid); extern void kswapd_stop(int nid); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 365733b428d8..1386c713885d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,8 +40,6 @@ enum thermal_trend { THERMAL_TREND_STABLE, /* temperature is stable */ THERMAL_TREND_RAISING, /* temperature is raising */ THERMAL_TREND_DROPPING, /* temperature is dropping */ - THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */ - THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; /* Thermal notification reason */ @@ -80,6 +78,18 @@ struct thermal_zone_device_ops { void (*critical)(struct thermal_zone_device *); }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + */ +struct thermal_trip { + int temperature; + int hysteresis; + enum thermal_trip_type type; +}; + struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); @@ -113,7 +123,8 @@ struct thermal_cooling_device { * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @mode: current mode of this thermal zone * @devdata: private pointer for device private data - * @trips: number of trip points the thermal zone supports + * @trips: an array of struct thermal_trip + * @num_trips: number of trip points the thermal zone supports * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. @@ -153,7 +164,8 @@ struct thermal_zone_device { struct thermal_attr *trip_hyst_attrs; enum thermal_device_mode mode; void *devdata; - int trips; + struct thermal_trip *trips; + int num_trips; unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; @@ -366,8 +378,14 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, struct thermal_zone_params *, int, int); + void thermal_zone_device_unregister(struct thermal_zone_device *); +struct thermal_zone_device * +thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, + void *, struct thermal_zone_device_ops *, + struct thermal_zone_params *, int, int); + int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f878..55717a2eda08 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ \ if (rcuidle) { \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ } \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ if (rcuidle) { \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ } \ \ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a328cf02b75..47e5d374c7eb 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -148,7 +148,7 @@ _copy_to_user(void __user *, const void *, unsigned long); static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (likely(check_copy_size(to, n, false))) + if (check_copy_size(to, n, false)) n = _copy_from_user(to, from, n); return n; } @@ -156,7 +156,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (likely(check_copy_size(from, n, true))) + if (check_copy_size(from, n, true)) n = _copy_to_user(to, from, n); return n; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..9a2dc496d535 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -156,19 +156,17 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else + if (check_copy_size(addr, bytes, true)) return _copy_to_iter(addr, bytes, i); + return 0; } static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter(addr, bytes, i); + return 0; } static __always_inline __must_check @@ -184,10 +182,9 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter_nocache(addr, bytes, i); + return 0; } static __always_inline __must_check @@ -219,6 +216,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, diff --git a/include/linux/wait.h b/include/linux/wait.h index 851e07da2583..58cfbf81447c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -544,10 +544,11 @@ do { \ \ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ HRTIMER_MODE_REL); \ - if ((timeout) != KTIME_MAX) \ - hrtimer_start_range_ns(&__t.timer, timeout, \ - current->timer_slack_ns, \ - HRTIMER_MODE_REL); \ + if ((timeout) != KTIME_MAX) { \ + hrtimer_set_expires_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns); \ + hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \ + } \ \ __ret = ___wait_event(wq_head, condition, state, 0, 0, \ if (!__t.task) { \ diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 3b9a40ae8bdb..fc6bba20273b 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -4,7 +4,7 @@ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * See Documentation/watch_queue.rst + * See Documentation/core-api/watch_queue.rst */ #ifndef _LINUX_WATCH_QUEUE_H diff --git a/include/linux/writeback.h b/include/linux/writeback.h index da21d63f70e2..3f045f6d6c4f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -101,9 +101,9 @@ struct writeback_control { #endif }; -static inline int wbc_to_write_flags(struct writeback_control *wbc) +static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc) { - int flags = 0; + blk_opf_t flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; @@ -364,7 +364,14 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void wb_update_bandwidth(struct bdi_writeback *wb); + +/* Invoke balance dirty pages in async mode. */ +#define BDP_ASYNC 0x0001 + void balance_dirty_pages_ratelimited(struct address_space *mapping); +int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, + unsigned int flags); + bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, diff --git a/include/linux/xarray.h b/include/linux/xarray.h index c29e11b2c073..44dd6d6e01bc 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -16,6 +16,7 @@ #include <linux/kconfig.h> #include <linux/kernel.h> #include <linux/rcupdate.h> +#include <linux/sched/mm.h> #include <linux/spinlock.h> #include <linux/types.h> @@ -586,6 +587,7 @@ static inline void *xa_store_bh(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_bh(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_bh(xa); @@ -612,6 +614,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_irq(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_irq(xa); @@ -687,6 +690,7 @@ static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock(xa); @@ -714,6 +718,7 @@ static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_bh(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_bh(xa); @@ -741,6 +746,7 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_irq(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_irq(xa); @@ -770,6 +776,7 @@ static inline int __must_check xa_insert(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock(xa); @@ -799,6 +806,7 @@ static inline int __must_check xa_insert_bh(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_bh(xa); @@ -828,6 +836,7 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_irq(xa); @@ -857,6 +866,7 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock(xa); @@ -886,6 +896,7 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_bh(xa); @@ -915,6 +926,7 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_irq(xa); @@ -948,6 +960,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock(xa); @@ -981,6 +994,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_bh(xa); @@ -1014,6 +1028,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_irq(xa); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4c379d23ec6e..979a9d3e5bfb 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, - const void *, size_t, int); + void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, const char *, struct inode **); |