diff options
Diffstat (limited to 'include/linux')
195 files changed, 5009 insertions, 2155 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 729cff1ee3f8..3015235d65e3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -498,7 +498,7 @@ bool acpi_dev_resource_address_space(struct acpi_resource *ares, struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, struct resource_win *win); -unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); +unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable, u8 wake_capable); unsigned int acpi_dev_get_irq_type(int triggering, int polarity); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); @@ -1211,7 +1211,8 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); +int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index, + bool *wake_capable); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) @@ -1223,16 +1224,28 @@ static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares, { return false; } -static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, - const char *name, int index) +static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, + int index, bool *wake_capable) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index, + bool *wake_capable) +{ + return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable); +} + +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, + int index) +{ + return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL); +} + static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { - return acpi_dev_gpio_irq_get_by(adev, NULL, index); + return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, NULL); } /* Device properties */ diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 49e5383d4222..17fa26215292 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -13,6 +13,7 @@ #include <linux/compiler.h> +struct clk; struct device; struct ata_port_info; struct ahci_host_priv; @@ -21,8 +22,12 @@ struct scsi_host_template; int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); +struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, + const char *con_id); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); +int ahci_platform_deassert_rsts(struct ahci_host_priv *hpriv); +int ahci_platform_assert_rsts(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); @@ -41,6 +46,7 @@ int ahci_platform_resume_host(struct device *dev); int ahci_platform_suspend(struct device *dev); int ahci_platform_resume(struct device *dev); -#define AHCI_PLATFORM_GET_RESETS 0x01 +#define AHCI_PLATFORM_GET_RESETS BIT(0) +#define AHCI_PLATFORM_RST_TRIGGER BIT(1) #endif /* _AHCI_PLATFORM_H */ diff --git a/include/linux/ata.h b/include/linux/ata.h index 21292b5bbb55..e3050e153a71 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -566,6 +566,18 @@ struct ata_bmdma_prd { ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) +#define ata_id_has_devslp(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) +#define ata_id_has_ncq_autosense(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) +#define ata_id_has_dipm(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ @@ -578,9 +590,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) -#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { @@ -592,17 +601,6 @@ static inline bool ata_id_has_hipm(const u16 *id) return val & (1 << 9); } -static inline bool ata_id_has_dipm(const u16 *id) -{ - u16 val = id[ATA_ID_FEATURE_SUPP]; - - if (val == 0 || val == 0xffff) - return false; - - return val & (1 << 3); -} - - static inline bool ata_id_has_fua(const u16 *id) { if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) @@ -771,16 +769,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) static inline bool ata_id_has_sense_reporting(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15))) + return false; + if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14)) return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); + return id[ATA_ID_COMMAND_SET_3] & BIT(6); } static inline bool ata_id_sense_reporting_enabled(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!ata_id_has_sense_reporting(id)) + return false; + /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */ + if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14)) return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); + return id[ATA_ID_COMMAND_SET_4] & BIT(6); } /** diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index f65410a49fda..7d6d73b78147 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -51,6 +51,7 @@ struct device; * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -164,6 +165,8 @@ bool __bitmap_intersects(const unsigned long *bitmap1, bool __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); +unsigned int __bitmap_weight_and(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -222,7 +225,6 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n #else #define bitmap_copy_le bitmap_copy #endif -unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); @@ -439,6 +441,15 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits) return __bitmap_weight(src, nbits); } +static __always_inline +unsigned long bitmap_weight_and(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_and(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3b89c64bcfd8..2ba557e067fe 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -248,6 +248,25 @@ static inline unsigned long __ffs64(u64 word) } /** + * fns - find N'th set bit in a word + * @word: The word to search + * @n: Bit to find + */ +static inline unsigned long fns(unsigned long word, unsigned int n) +{ + unsigned int bit; + + while (word) { + bit = __ffs(word); + if (n-- == 0) + return bit; + __clear_bit(bit, &word); + } + + return BITS_PER_LONG; +} + +/** * assign_bit - Assign value to a bit in memory * @nr: the bit to set * @addr: the address to start counting from @@ -328,10 +347,10 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ new__ = (old__ & ~mask__) | bits__; \ - } while (cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ old__; \ }) @@ -343,11 +362,12 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ + if (old__ & test__) \ + break; \ new__ = old__ & ~clear__; \ - } while (!(old__ & test__) && \ - cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ !(old__ & test__); \ }) diff --git a/include/linux/bma150.h b/include/linux/bma150.h index 31c9e323a391..4d4a62d49341 100644 --- a/include/linux/bma150.h +++ b/include/linux/bma150.h @@ -33,8 +33,8 @@ struct bma150_cfg { unsigned char lg_hyst; /* Low-G hysterisis */ unsigned char lg_dur; /* Low-G duration */ unsigned char lg_thres; /* Low-G threshold */ - unsigned char range; /* one of BMA0150_RANGE_xxx */ - unsigned char bandwidth; /* one of BMA0150_BW_xxx */ + unsigned char range; /* one of BMA150_RANGE_xxx */ + unsigned char bandwidth; /* one of BMA150_BW_xxx */ }; struct bma150_platform_data { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 06089390d81d..33fa5e94aa80 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,8 +225,6 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, - gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -236,7 +234,6 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); @@ -244,7 +241,9 @@ void submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -int bh_submit_read(struct buffer_head *bh); +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); +void __bh_read_batch(int nr, struct buffer_head *bhs[], + blk_opf_t op_flags, bool force_lock); extern int buffer_heads_over_limit; @@ -351,12 +350,6 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } -static inline void -sb_breadahead_unmovable(struct super_block *sb, sector_t block) -{ - __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); -} - static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { @@ -418,6 +411,41 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); } +static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (!buffer_uptodate(bh) && trylock_buffer(bh)) { + if (!buffer_uptodate(bh)) + __bh_read(bh, op_flags, false); + else + unlock_buffer(bh); + } +} + +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (!bh_uptodate_or_lock(bh)) + __bh_read(bh, op_flags, false); +} + +/* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */ +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (bh_uptodate_or_lock(bh)) + return 1; + return __bh_read(bh, op_flags, true); +} + +static inline void bh_read_batch(int nr, struct buffer_head *bhs[]) +{ + __bh_read_batch(nr, bhs, 0, true); +} + +static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], + blk_opf_t op_flags) +{ + __bh_read_batch(nr, bhs, op_flags, false); +} + /** * __bread() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/include/linux/cache.h b/include/linux/cache.h index d742c57eaee5..5da1bbd96154 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -85,4 +85,17 @@ #define cache_line_size() L1_CACHE_BYTES #endif +/* + * Helper to add padding within a struct to ensure data fall into separate + * cachelines. + */ +#if defined(CONFIG_SMP) +struct cacheline_padding { + char x[0]; +} ____cacheline_internodealigned_in_smp; +#define CACHELINE_PADDING(name) struct cacheline_padding name +#else +#define CACHELINE_PADDING(name) +#endif + #endif /* __LINUX_CACHE_H */ diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e7f2fb2fc207..99c1726be6ee 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -207,7 +207,6 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ - bool last_piece; /* current is last piece */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -498,8 +497,7 @@ void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, struct ceph_msg *msg, size_t length); struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece); + size_t *page_offset, size_t *length); void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4bcf56b3491c..8f481d1b159a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -126,11 +126,11 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ - CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ + __CFTYPE_ADDED = (1 << 18), }; /* @@ -384,7 +384,7 @@ struct cgroup { /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with - * ancestor_ids[] can determine whether a given cgroup is a + * ancestors[] can determine whether a given cgroup is a * descendant of another without traversing the hierarchy. */ int level; @@ -504,8 +504,8 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; - /* ids of the ancestors at each level including self */ - u64 ancestor_ids[]; + /* All ancestors including self */ + struct cgroup *ancestors[]; }; /* @@ -522,11 +522,15 @@ struct cgroup_root { /* Unique id for this hierarchy. */ int hierarchy_id; - /* The root cgroup. Root is destroyed on its release. */ + /* + * The root cgroup. The containing cgroup_root will be destroyed on its + * release. cgrp->ancestors[0] will be used overflowing into the + * following field. cgrp_ancestor_storage must immediately follow. + */ struct cgroup cgrp; - /* for cgrp->ancestor_ids[0] */ - u64 cgrp_ancestor_id_storage; + /* must follow cgrp for cgrp->ancestors[0], see above */ + struct cgroup *cgrp_ancestor_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ac5d0515680e..23b102b4349e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -114,6 +114,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); +void cgroup_file_show(struct cgroup_file *cfile, bool show); int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); @@ -432,6 +433,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +459,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -574,7 +586,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; - return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); + return cgrp->ancestors[ancestor->level] == ancestor; } /** @@ -591,11 +603,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, int ancestor_level) { - if (cgrp->level < ancestor_level) + if (ancestor_level < 0 || ancestor_level > cgrp->level) return NULL; - while (cgrp && cgrp->level > ancestor_level) - cgrp = cgroup_parent(cgrp); - return cgrp; + return cgrp->ancestors[ancestor_level]; } /** @@ -708,6 +718,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, @@ -747,11 +759,6 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) {} - -static inline struct cgroup *cgroup_get_from_id(u64 id) -{ - return NULL; -} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 1615010aa0ec..2108b5695327 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -350,7 +350,7 @@ struct clk_hw *__clk_hw_register_fixed_rate(struct device *dev, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, unsigned long fixed_rate, unsigned long fixed_accuracy, - unsigned long clk_fixed_flags); + unsigned long clk_fixed_flags, bool devm); struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate); @@ -365,7 +365,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, */ #define clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ - NULL, (flags), (fixed_rate), 0, 0) + NULL, (flags), (fixed_rate), 0, 0, false) + +/** + * devm_clk_hw_register_fixed_rate - register fixed-rate clock with the clock + * framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define devm_clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ + NULL, (flags), (fixed_rate), 0, 0, true) /** * clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with * the clock framework @@ -378,7 +391,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, #define clk_hw_register_fixed_rate_parent_hw(dev, name, parent_hw, flags, \ fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \ - NULL, (flags), (fixed_rate), 0, 0) + NULL, (flags), (fixed_rate), 0, 0, false) /** * clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with * the clock framework @@ -392,7 +405,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ (parent_data), (flags), (fixed_rate), 0, \ - 0) + 0, false) /** * clk_hw_register_fixed_rate_with_accuracy - register fixed-rate clock with * the clock framework @@ -408,7 +421,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), \ NULL, NULL, (flags), (fixed_rate), \ - (fixed_accuracy), 0) + (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_hw - register fixed-rate * clock with the clock framework @@ -423,7 +436,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, parent_hw, flags, fixed_rate, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \ NULL, NULL, (flags), (fixed_rate), \ - (fixed_accuracy), 0) + (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate * clock with the clock framework @@ -438,7 +451,21 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, parent_data, flags, fixed_rate, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ (parent_data), NULL, (flags), \ - (fixed_rate), (fixed_accuracy), 0) + (fixed_rate), (fixed_accuracy), 0, false) +/** + * clk_hw_register_fixed_rate_parent_accuracy - register fixed-rate clock with + * the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define clk_hw_register_fixed_rate_parent_accuracy(dev, name, parent_data, \ + flags, fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ + (parent_data), (flags), (fixed_rate), 0, \ + CLK_FIXED_RATE_PARENT_ACCURACY, false) void clk_unregister_fixed_rate(struct clk *clk); void clk_hw_unregister_fixed_rate(struct clk_hw *hw); @@ -957,6 +984,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (parent_names), NULL, NULL, (flags), (reg), \ (shift), (mask), (clk_mux_flags), (table), \ (lock)) +#define clk_hw_register_mux_table_parent_data(dev, name, parent_data, \ + num_parents, flags, reg, shift, mask, \ + clk_mux_flags, table, lock) \ + __clk_hw_register_mux((dev), NULL, (name), (num_parents), \ + NULL, NULL, (parent_data), (flags), (reg), \ + (shift), (mask), (clk_mux_flags), (table), \ + (lock)) #define clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \ shift, width, clk_mux_flags, lock) \ __clk_hw_register_mux((dev), NULL, (name), (num_parents), \ @@ -974,6 +1008,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \ (parent_data), (flags), (reg), (shift), \ BIT((width)) - 1, (clk_mux_flags), NULL, (lock)) +#define clk_hw_register_mux_parent_data_table(dev, name, parent_data, \ + num_parents, flags, reg, shift, \ + width, clk_mux_flags, table, \ + lock) \ + __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \ + (parent_data), (flags), (reg), (shift), \ + BIT((width)) - 1, (clk_mux_flags), table, (lock)) #define devm_clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \ shift, width, clk_mux_flags, lock) \ __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), \ @@ -987,6 +1028,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (parent_hws), NULL, (flags), (reg), \ (shift), BIT((width)) - 1, \ (clk_mux_flags), NULL, (lock)) +#define devm_clk_hw_register_mux_parent_data_table(dev, name, parent_data, \ + num_parents, flags, reg, shift, \ + width, clk_mux_flags, table, \ + lock) \ + __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, \ + NULL, (parent_data), (flags), (reg), (shift), \ + BIT((width)) - 1, (clk_mux_flags), table, (lock)) int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags, unsigned int val); @@ -1454,7 +1502,7 @@ int devm_of_clk_add_hw_provider(struct device *dev, void *data), void *data); void of_clk_del_provider(struct device_node *np); -void devm_of_clk_del_provider(struct device *dev); + struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, @@ -1491,7 +1539,7 @@ static inline int devm_of_clk_add_hw_provider(struct device *dev, return 0; } static inline void of_clk_del_provider(struct device_node *np) {} -static inline void devm_of_clk_del_provider(struct device *dev) {} + static inline struct clk *of_clk_src_simple_get( struct of_phandle_args *clkspec, void *data) { diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h index 8a7b5cd7eac0..f6ebab6228c2 100644 --- a/include/linux/clk/davinci.h +++ b/include/linux/clk/davinci.h @@ -28,13 +28,5 @@ int dm365_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgch int dm365_pll2_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); int dm365_psc_init(struct device *dev, void __iomem *base); #endif -#ifdef CONFIG_ARCH_DAVINCI_DM644x -int dm644x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm644x_psc_init(struct device *dev, void __iomem *base); -#endif -#ifdef CONFIG_ARCH_DAVINCI_DM646x -int dm646x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm646x_psc_init(struct device *dev, void __iomem *base); -#endif #endif /* __LINUX_CLK_DAVINCI_PLL_H___ */ diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 8a8423eb8e9a..45570bc21a43 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -46,6 +46,4 @@ int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, const char *con_id, const char *dev_id); -void devm_clk_release_clkdev(struct device *dev, const char *con_id, - const char *dev_id); #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 42e55579d649..6cfd6902bd5b 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -51,6 +51,29 @@ #define __no_sanitize_undefined #endif +#if __has_feature(memory_sanitizer) +#define __SANITIZE_MEMORY__ +/* + * Unlike other sanitizers, KMSAN still inserts code into functions marked with + * no_sanitize("kernel-memory"). Using disable_sanitizer_instrumentation + * provides the behavior consistent with other __no_sanitize_ attributes, + * guaranteeing that __no_sanitize_memory functions remain uninstrumented. + */ +#define __no_sanitize_memory __disable_sanitizer_instrumentation + +/* + * The __no_kmsan_checks attribute ensures that a function does not produce + * false positive reports by: + * - initializing all local variables and memory stores in this function; + * - skipping all shadow checks; + * - passing initialized arguments to this function's callees. + */ +#define __no_kmsan_checks __attribute__((no_sanitize("kernel-memory"))) +#else +#define __no_sanitize_memory +#define __no_kmsan_checks +#endif + /* * Support for __has_feature(coverage_sanitizer) was added in Clang 13 together * with no_sanitize("coverage"). Prior versions of Clang support coverage diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 9b157b71036f..f55a37efdb97 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -115,6 +115,12 @@ #endif /* + * GCC does not support KMSAN. + */ +#define __no_sanitize_memory +#define __no_kmsan_checks + +/* * Turn individual warnings and errors on and off locally, depending * on version. */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index fc93c9488c76..898b3458b24a 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -35,7 +35,8 @@ /* * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally - * available and includes other attributes. + * available and includes other attributes. For GCC < 9.1, __alloc_size__ gets undefined + * in compiler-gcc.h, due to misbehaviors. * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d9d98e8a9a3b..eb0466236661 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -233,7 +233,8 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ noinline notrace __attribute((__section__(".noinstr.text"))) \ - __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage + __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ + __no_sanitize_memory #endif /* __KERNEL__ */ @@ -271,14 +272,16 @@ struct ftrace_likely_data { /* * Any place that could be marked with the "alloc_size" attribute is also - * a place to be marked with the "malloc" attribute. Do this as part of the - * __alloc_size macro to avoid redundant attributes and to avoid missing a - * __malloc marking. + * a place to be marked with the "malloc" attribute, except those that may + * be performing a _reallocation_, as that may alias the existing pointer. + * For these, use __realloc_size(). */ #ifdef __alloc_size__ # define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc +# define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) #else # define __alloc_size(x, ...) __malloc +# define __realloc_size(x, ...) #endif #ifndef asm_volatile_goto diff --git a/include/linux/completion.h b/include/linux/completion.h index 51d9ab079629..62b32b19e0a8 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -103,6 +103,7 @@ extern void wait_for_completion(struct completion *); extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); +extern int wait_for_completion_state(struct completion *x, unsigned int state); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); extern unsigned long wait_for_completion_io_timeout(struct completion *x, diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 9f445f09fcfe..1554021231f9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -372,6 +372,29 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, return csa->read(offset, true, false); } +static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa, + u32 lo_offset, u32 hi_offset) +{ + if (likely(csa->io_mem)) { + return readl_relaxed(csa->base + lo_offset) | + ((u64)readl_relaxed(csa->base + hi_offset) << 32); + } + + return csa->read(lo_offset, true, false) | (csa->read(hi_offset, true, false) << 32); +} + +static inline void csdev_access_relaxed_write_pair(struct csdev_access *csa, u64 val, + u32 lo_offset, u32 hi_offset) +{ + if (likely(csa->io_mem)) { + writel_relaxed((u32)val, csa->base + lo_offset); + writel_relaxed((u32)(val >> 32), csa->base + hi_offset); + } else { + csa->write((u32)val, lo_offset, true, false); + csa->write((u32)(val >> 32), hi_offset, true, false); + } +} + static inline u32 csdev_access_read32(struct csdev_access *csa, u32 offset) { if (likely(csa->io_mem)) diff --git a/include/linux/counter.h b/include/linux/counter.h index 1fe17f5adb09..c41fa602ed28 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -31,6 +31,8 @@ enum counter_comp_type { COUNTER_COMP_ENUM, COUNTER_COMP_COUNT_DIRECTION, COUNTER_COMP_COUNT_MODE, + COUNTER_COMP_SIGNAL_POLARITY, + COUNTER_COMP_ARRAY, }; /** @@ -38,66 +40,114 @@ enum counter_comp_type { * @type: Counter component data type * @name: device-specific component name * @priv: component-relevant data - * @action_read: Synapse action mode read callback. The read value of the + * @action_read: Synapse action mode read callback. The read value of the * respective Synapse action mode should be passed back via * the action parameter. - * @device_u8_read: Device u8 component read callback. The read value of the + * @device_u8_read: Device u8 component read callback. The read value of the * respective Device u8 component should be passed back via * the val parameter. - * @count_u8_read: Count u8 component read callback. The read value of the + * @count_u8_read: Count u8 component read callback. The read value of the * respective Count u8 component should be passed back via * the val parameter. - * @signal_u8_read: Signal u8 component read callback. The read value of the + * @signal_u8_read: Signal u8 component read callback. The read value of the * respective Signal u8 component should be passed back via * the val parameter. - * @device_u32_read: Device u32 component read callback. The read value of + * @device_u32_read: Device u32 component read callback. The read value of * the respective Device u32 component should be passed * back via the val parameter. - * @count_u32_read: Count u32 component read callback. The read value of the + * @count_u32_read: Count u32 component read callback. The read value of the * respective Count u32 component should be passed back via * the val parameter. - * @signal_u32_read: Signal u32 component read callback. The read value of + * @signal_u32_read: Signal u32 component read callback. The read value of * the respective Signal u32 component should be passed * back via the val parameter. - * @device_u64_read: Device u64 component read callback. The read value of + * @device_u64_read: Device u64 component read callback. The read value of * the respective Device u64 component should be passed * back via the val parameter. - * @count_u64_read: Count u64 component read callback. The read value of the + * @count_u64_read: Count u64 component read callback. The read value of the * respective Count u64 component should be passed back via * the val parameter. - * @signal_u64_read: Signal u64 component read callback. The read value of + * @signal_u64_read: Signal u64 component read callback. The read value of * the respective Signal u64 component should be passed * back via the val parameter. - * @action_write: Synapse action mode write callback. The write value of + * @signal_array_u32_read: Signal u32 array component read callback. The + * index of the respective Count u32 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u32 array component element should be + * passed back via the val parameter. + * @device_array_u64_read: Device u64 array component read callback. The + * index of the respective Device u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Device u64 array component element should be + * passed back via the val parameter. + * @count_array_u64_read: Count u64 array component read callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u64 array component element should be + * passed back via the val parameter. + * @signal_array_u64_read: Signal u64 array component read callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u64 array component element should be + * passed back via the val parameter. + * @action_write: Synapse action mode write callback. The write value of * the respective Synapse action mode is passed via the * action parameter. - * @device_u8_write: Device u8 component write callback. The write value of + * @device_u8_write: Device u8 component write callback. The write value of * the respective Device u8 component is passed via the val * parameter. - * @count_u8_write: Count u8 component write callback. The write value of + * @count_u8_write: Count u8 component write callback. The write value of * the respective Count u8 component is passed via the val * parameter. - * @signal_u8_write: Signal u8 component write callback. The write value of + * @signal_u8_write: Signal u8 component write callback. The write value of * the respective Signal u8 component is passed via the val * parameter. - * @device_u32_write: Device u32 component write callback. The write value of + * @device_u32_write: Device u32 component write callback. The write value of * the respective Device u32 component is passed via the * val parameter. - * @count_u32_write: Count u32 component write callback. The write value of + * @count_u32_write: Count u32 component write callback. The write value of * the respective Count u32 component is passed via the val * parameter. - * @signal_u32_write: Signal u32 component write callback. The write value of + * @signal_u32_write: Signal u32 component write callback. The write value of * the respective Signal u32 component is passed via the * val parameter. - * @device_u64_write: Device u64 component write callback. The write value of + * @device_u64_write: Device u64 component write callback. The write value of * the respective Device u64 component is passed via the * val parameter. - * @count_u64_write: Count u64 component write callback. The write value of + * @count_u64_write: Count u64 component write callback. The write value of * the respective Count u64 component is passed via the val * parameter. - * @signal_u64_write: Signal u64 component write callback. The write value of + * @signal_u64_write: Signal u64 component write callback. The write value of * the respective Signal u64 component is passed via the * val parameter. + * @signal_array_u32_write: Signal u32 array component write callback. The + * index of the respective Signal u32 array + * component element is passed via the idx + * parameter. The write value of the respective + * Signal u32 array component element is passed via + * the val parameter. + * @device_array_u64_write: Device u64 array component write callback. The + * index of the respective Device u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Device u64 array component element is passed via + * the val parameter. + * @count_array_u64_write: Count u64 array component write callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Count u64 array component element is passed via + * the val parameter. + * @signal_array_u64_write: Signal u64 array component write callback. The + * index of the respective Signal u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Signal u64 array component element is passed via + * the val parameter. */ struct counter_comp { enum counter_comp_type type; @@ -125,6 +175,17 @@ struct counter_comp { struct counter_count *count, u64 *val); int (*signal_u64_read)(struct counter_device *counter, struct counter_signal *signal, u64 *val); + int (*signal_array_u32_read)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u32 *val); + int (*device_array_u64_read)(struct counter_device *counter, + size_t idx, u64 *val); + int (*count_array_u64_read)(struct counter_device *counter, + struct counter_count *count, + size_t idx, u64 *val); + int (*signal_array_u64_read)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u64 *val); }; union { int (*action_write)(struct counter_device *counter, @@ -148,6 +209,17 @@ struct counter_comp { struct counter_count *count, u64 val); int (*signal_u64_write)(struct counter_device *counter, struct counter_signal *signal, u64 val); + int (*signal_array_u32_write)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u32 val); + int (*device_array_u64_write)(struct counter_device *counter, + size_t idx, u64 val); + int (*count_array_u64_write)(struct counter_device *counter, + struct counter_count *count, + size_t idx, u64 val); + int (*signal_array_u64_write)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u64 val); }; }; @@ -452,6 +524,60 @@ struct counter_available { .priv = &(_available), \ } +struct counter_array { + enum counter_comp_type type; + const struct counter_available *avail; + union { + size_t length; + size_t idx; + }; +}; + +#define DEFINE_COUNTER_ARRAY_U64(_name, _length) \ + struct counter_array _name = { \ + .type = COUNTER_COMP_U64, \ + .length = (_length), \ + } + +#define DEFINE_COUNTER_ARRAY_CAPTURE(_name, _length) \ + DEFINE_COUNTER_ARRAY_U64(_name, _length) + +#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _enums, _length) \ + DEFINE_COUNTER_AVAILABLE(_name##_available, _enums); \ + struct counter_array _name = { \ + .type = COUNTER_COMP_SIGNAL_POLARITY, \ + .avail = &(_name##_available), \ + .length = (_length), \ + } + +#define COUNTER_COMP_DEVICE_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .device_array_u64_read = (_read), \ + .device_array_u64_write = (_write), \ + .priv = &(_array), \ +} +#define COUNTER_COMP_COUNT_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .count_array_u64_read = (_read), \ + .count_array_u64_write = (_write), \ + .priv = &(_array), \ +} +#define COUNTER_COMP_SIGNAL_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .signal_array_u64_read = (_read), \ + .signal_array_u64_write = (_write), \ + .priv = &(_array), \ +} + +#define COUNTER_COMP_CAPTURE(_read, _write) \ + COUNTER_COMP_COUNT_U64("capture", _read, _write) + #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) @@ -477,10 +603,31 @@ struct counter_available { #define COUNTER_COMP_FLOOR(_read, _write) \ COUNTER_COMP_COUNT_U64("floor", _read, _write) +#define COUNTER_COMP_POLARITY(_read, _write, _available) \ +{ \ + .type = COUNTER_COMP_SIGNAL_POLARITY, \ + .name = "polarity", \ + .signal_u32_read = (_read), \ + .signal_u32_write = (_write), \ + .priv = &(_available), \ +} + #define COUNTER_COMP_PRESET(_read, _write) \ COUNTER_COMP_COUNT_U64("preset", _read, _write) #define COUNTER_COMP_PRESET_ENABLE(_read, _write) \ COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write) +#define COUNTER_COMP_ARRAY_CAPTURE(_read, _write, _array) \ + COUNTER_COMP_COUNT_ARRAY_U64("capture", _read, _write, _array) + +#define COUNTER_COMP_ARRAY_POLARITY(_read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = "polarity", \ + .signal_array_u32_read = (_read), \ + .signal_array_u32_write = (_write), \ + .priv = &(_array), \ +} + #endif /* _COUNTER_H_ */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e8ad12b5b9d2..2f065ad97541 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -35,19 +35,23 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) -#if NR_CPUS == 1 -#define nr_cpu_ids 1U +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) +#define nr_cpu_ids ((unsigned int)NR_CPUS) #else extern unsigned int nr_cpu_ids; #endif -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids +static inline void set_nr_cpu_ids(unsigned int nr) +{ +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + WARN_ON(nr != nr_cpu_ids); #else -#define nr_cpumask_bits ((unsigned int)NR_CPUS) + nr_cpu_ids = nr; #endif +} + +/* Deprecated. Always use nr_cpu_ids. */ +#define nr_cpumask_bits nr_cpu_ids /* * The following particular system cpumasks and operations manage @@ -67,10 +71,6 @@ extern unsigned int nr_cpu_ids; * cpu_online_mask is the dynamic subset of cpu_present_mask, * indicating those CPUs available for scheduling. * - * If HOTPLUG is enabled, then cpu_possible_mask is forced to have - * all NR_CPUS bits set, otherwise it is just the set of CPUs that - * ACPI reports present at boot. - * * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise * cpu_present_mask is just a copy of cpu_possible_mask. @@ -174,9 +174,8 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); + /* n is a prior cpu */ + cpumask_check(n + 1); return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); } @@ -189,9 +188,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); + /* n is a prior cpu */ + cpumask_check(n + 1); return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } @@ -231,9 +229,8 @@ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, const struct cpumask *src2p) { - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); + /* n is a prior cpu */ + cpumask_check(n + 1); return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits, n + 1); } @@ -246,9 +243,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next((cpu), (mask)), \ - (cpu) < nr_cpu_ids;) + for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) /** * for_each_cpu_not - iterate over every cpu in a complemented mask @@ -258,17 +253,15 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_not(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next_zero((cpu), (mask)), \ - (cpu) < nr_cpu_ids;) + for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) #if NR_CPUS == 1 static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { cpumask_check(start); - if (n != -1) - cpumask_check(n); + /* n is a prior cpu */ + cpumask_check(n + 1); /* * Return the first available CPU when wrapping, or when starting before cpu0, @@ -293,10 +286,8 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * * After the loop, cpu is >= nr_cpu_ids. */ -#define for_each_cpu_wrap(cpu, mask, start) \ - for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ - (cpu) < nr_cpumask_bits; \ - (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) +#define for_each_cpu_wrap(cpu, mask, start) \ + for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start) /** * for_each_cpu_and - iterate over every cpu in both masks @@ -313,9 +304,25 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ - (cpu) < nr_cpu_ids;) + for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + +/** + * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding + * those present in another. + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_andnot(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_andnot(cpu, mask1, mask2) \ + for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) /** * cpumask_any_but - return a "random" in a cpumask, but not this one. @@ -337,6 +344,50 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) return i; } +/** + * cpumask_nth - get the first cpu in a cpumask + * @srcp: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) +{ + return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu)); +} + +/** + * cpumask_nth_and - get the first cpu in 2 cpumasks + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline +unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), + nr_cpumask_bits, cpumask_check(cpu)); +} + +/** + * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline +unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), + nr_cpumask_bits, cpumask_check(cpu)); +} + #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ @@ -587,6 +638,17 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) } /** + * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + */ +static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift diff --git a/include/linux/damon.h b/include/linux/damon.h index 7b1f4a488230..ed5470f50bab 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -216,13 +216,26 @@ struct damos_stat { }; /** - * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. * @max_sz_region: Maximum size of target regions. * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. * @min_age_region: Minimum age of target regions. * @max_age_region: Maximum age of target regions. + */ +struct damos_access_pattern { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. @@ -230,10 +243,8 @@ struct damos_stat { * @list: List head for siblings. * * For each aggregation interval, DAMON finds regions which fit in the - * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, - * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to - * those. To avoid consuming too much CPU time or IO resources for the - * &action, "a is used. + * &pattern and applies &action to those. To avoid consuming too much + * CPU time or IO resources for the &action, "a is used. * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the @@ -248,12 +259,7 @@ struct damos_stat { * &action is applied. */ struct damos { - unsigned long min_sz_region; - unsigned long max_sz_region; - unsigned int min_nr_accesses; - unsigned int max_nr_accesses; - unsigned int min_age_region; - unsigned int max_age_region; + struct damos_access_pattern pattern; enum damos_action action; struct damos_quota quota; struct damos_watermarks wmarks; @@ -340,7 +346,7 @@ struct damon_operations { unsigned long (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); - bool (*target_valid)(void *target); + bool (*target_valid)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; @@ -383,13 +389,15 @@ struct damon_callback { }; /** - * struct damon_ctx - Represents a context for each monitoring. This is the - * main interface that allows users to set the attributes and get the results - * of the monitoring. + * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. * @ops_update_interval: The time between monitoring operations updates. + * @min_nr_regions: The minimum number of adaptive monitoring + * regions. + * @max_nr_regions: The maximum number of adaptive monitoring + * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or * not. It aggregates and keeps the access information (number of accesses to @@ -399,7 +407,21 @@ struct damon_callback { * @ops_update_interval. All time intervals are in micro-seconds. * Please refer to &struct damon_operations and &struct damon_callback for more * detail. + */ +struct damon_attrs { + unsigned long sample_interval; + unsigned long aggr_interval; + unsigned long ops_update_interval; + unsigned long min_nr_regions; + unsigned long max_nr_regions; +}; + +/** + * struct damon_ctx - Represents a context for each monitoring. This is the + * main interface that allows users to set the attributes and get the results + * of the monitoring. * + * @attrs: Monitoring attributes for accuracy/overhead control. * @kdamond: Kernel thread who does the monitoring. * @kdamond_lock: Mutex for the synchronizations with @kdamond. * @@ -421,15 +443,11 @@ struct damon_callback { * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. * - * @min_nr_regions: The minimum number of adaptive monitoring regions. - * @max_nr_regions: The maximum number of adaptive monitoring regions. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ struct damon_ctx { - unsigned long sample_interval; - unsigned long aggr_interval; - unsigned long ops_update_interval; + struct damon_attrs attrs; /* private: internal use only */ struct timespec64 last_aggregation; @@ -442,8 +460,6 @@ struct damon_ctx { struct damon_operations ops; struct damon_callback callback; - unsigned long min_nr_regions; - unsigned long max_nr_regions; struct list_head adaptive_targets; struct list_head schemes; }; @@ -463,9 +479,17 @@ static inline struct damon_region *damon_last_region(struct damon_target *t) return list_last_entry(&t->regions_list, struct damon_region, list); } +static inline struct damon_region *damon_first_region(struct damon_target *t) +{ + return list_first_entry(&t->regions_list, struct damon_region, list); +} + #define damon_for_each_region(r, t) \ list_for_each_entry(r, &t->regions_list, list) +#define damon_for_each_region_from(r, t) \ + list_for_each_entry_from(r, &t->regions_list, list) + #define damon_for_each_region_safe(r, next, t) \ list_for_each_entry_safe(r, next, &t->regions_list, list) @@ -501,12 +525,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); -struct damos *damon_new_scheme( - unsigned long min_sz_region, unsigned long max_sz_region, - unsigned int min_nr_accesses, unsigned int max_nr_accesses, - unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action, struct damos_quota *quota, - struct damos_watermarks *wmarks); +struct damos *damon_new_scheme(struct damos_access_pattern *pattern, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); @@ -519,10 +540,8 @@ unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); -int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, - unsigned long aggr_int, unsigned long ops_upd_int, - unsigned long min_nr_reg, unsigned long max_nr_reg); -int damon_set_schemes(struct damon_ctx *ctx, +int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs); +void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); bool damon_is_registered_ops(enum damon_ops_id id); @@ -538,6 +557,9 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +int damon_set_region_biggest_system_ram_default(struct damon_target *t, + unsigned long *start, unsigned long *end); + #endif /* CONFIG_DAMON */ #endif /* _DAMON_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 54d46518c481..6b351e009f59 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -16,6 +16,7 @@ #include <linux/wait.h> struct path; +struct file; struct vfsmount; /* @@ -250,7 +251,7 @@ extern struct dentry * d_make_root(struct inode *); /* <clickety>-<click> the ramfs-type tree */ extern void d_genocide(struct dentry *); -extern void d_tmpfile(struct dentry *, struct inode *); +extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 58aea2d7385c..0da97dba9ef8 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -73,8 +73,8 @@ extern int delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); -extern void __delayacct_thrashing_start(void); -extern void __delayacct_thrashing_end(void); +extern void __delayacct_thrashing_start(bool *in_thrashing); +extern void __delayacct_thrashing_end(bool *in_thrashing); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); extern void __delayacct_compact_start(void); @@ -143,22 +143,22 @@ static inline void delayacct_freepages_end(void) __delayacct_freepages_end(); } -static inline void delayacct_thrashing_start(void) +static inline void delayacct_thrashing_start(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) - __delayacct_thrashing_start(); + __delayacct_thrashing_start(in_thrashing); } -static inline void delayacct_thrashing_end(void) +static inline void delayacct_thrashing_end(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) - __delayacct_thrashing_end(); + __delayacct_thrashing_end(in_thrashing); } static inline void delayacct_swapin_start(void) @@ -237,9 +237,9 @@ static inline void delayacct_freepages_start(void) {} static inline void delayacct_freepages_end(void) {} -static inline void delayacct_thrashing_start(void) +static inline void delayacct_thrashing_start(bool *in_thrashing) {} -static inline void delayacct_thrashing_end(void) +static inline void delayacct_thrashing_end(bool *in_thrashing) {} static inline void delayacct_swapin_start(void) {} diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h deleted file mode 100644 index 24607dc3c2ac..000000000000 --- a/include/linux/dma-iommu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-2015 ARM Ltd. - */ -#ifndef __DMA_IOMMU_H -#define __DMA_IOMMU_H - -#include <linux/errno.h> -#include <linux/types.h> - -#ifdef CONFIG_IOMMU_DMA -#include <linux/dma-mapping.h> -#include <linux/iommu.h> -#include <linux/msi.h> - -/* Domain management interface for IOMMU drivers */ -int iommu_get_dma_cookie(struct iommu_domain *domain); -int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); -void iommu_put_dma_cookie(struct iommu_domain *domain); - -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); -int iommu_dma_init_fq(struct iommu_domain *domain); - -/* The DMA API isn't _quite_ the whole story, though... */ -/* - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU device - * - * The MSI page will be stored in @desc. - * - * Return: 0 on success otherwise an error describing the failure. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); - -/* Update the MSI message if required. */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg); - -void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); - -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain); - -extern bool iommu_dma_forcedac; - -#else /* CONFIG_IOMMU_DMA */ - -struct iommu_domain; -struct msi_desc; -struct msi_msg; -struct device; - -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 dma_limit) -{ -} - -static inline int iommu_dma_init_fq(struct iommu_domain *domain) -{ - return -EINVAL; -} - -static inline int iommu_get_dma_cookie(struct iommu_domain *domain) -{ - return -ENODEV; -} - -static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) -{ - return -ENODEV; -} - -static inline void iommu_put_dma_cookie(struct iommu_domain *domain) -{ -} - -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, - phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) -{ -} - -static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) -{ -} - -#endif /* CONFIG_IOMMU_DMA */ -#endif /* __DMA_IOMMU_H */ diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index a6b7bc707356..77ea602c287c 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -8,11 +8,13 @@ #ifndef _DMA_HSU_H #define _DMA_HSU_H -#include <linux/device.h> -#include <linux/interrupt.h> +#include <linux/errno.h> +#include <linux/kconfig.h> +#include <linux/types.h> #include <linux/platform_data/dma-hsu.h> +struct device; struct hsu_dma; /** diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index dce631e678dd..41682278d2e8 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -6,6 +6,8 @@ #include <linux/jump_label.h> #endif +#include <linux/build_bug.h> + /* * An instance of this structure is created in a special * ELF section at every dynamic debug callsite. At runtime, @@ -21,6 +23,9 @@ struct _ddebug { const char *filename; const char *format; unsigned int lineno:18; +#define CLS_BITS 6 + unsigned int class_id:CLS_BITS; +#define _DPRINTK_CLASS_DFLT ((1 << CLS_BITS) - 1) /* * The flags field controls the behaviour at the callsite. * The bits here are changed dynamically when the user @@ -51,15 +56,82 @@ struct _ddebug { #endif } __attribute__((aligned(8))); - +enum class_map_type { + DD_CLASS_TYPE_DISJOINT_BITS, + /** + * DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, one per bit. + * expecting hex input. Built for drm.debug, basis for other types. + */ + DD_CLASS_TYPE_LEVEL_NUM, + /** + * DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0-N. + * N turns on just bits N-1 .. 0, so N=0 turns all bits off. + */ + DD_CLASS_TYPE_DISJOINT_NAMES, + /** + * DD_CLASS_TYPE_DISJOINT_NAMES: input is a CSV of [+-]CLASS_NAMES, + * classes are independent, like _DISJOINT_BITS. + */ + DD_CLASS_TYPE_LEVEL_NAMES, + /** + * DD_CLASS_TYPE_LEVEL_NAMES: input is a CSV of [+-]CLASS_NAMES, + * intended for names like: INFO,DEBUG,TRACE, with a module prefix + * avoid EMERG,ALERT,CRIT,ERR,WARNING: they're not debug + */ +}; + +struct ddebug_class_map { + struct list_head link; + struct module *mod; + const char *mod_name; /* needed for builtins */ + const char **class_names; + const int length; + const int base; /* index of 1st .class_id, allows split/shared space */ + enum class_map_type map_type; +}; + +/** + * DECLARE_DYNDBG_CLASSMAP - declare classnames known by a module + * @_var: a struct ddebug_class_map, passed to module_param_cb + * @_type: enum class_map_type, chooses bits/verbose, numeric/symbolic + * @_base: offset of 1st class-name. splits .class_id space + * @classes: class-names used to control class'd prdbgs + */ +#define DECLARE_DYNDBG_CLASSMAP(_var, _maptype, _base, ...) \ + static const char *_var##_classnames[] = { __VA_ARGS__ }; \ + static struct ddebug_class_map __aligned(8) __used \ + __section("__dyndbg_classes") _var = { \ + .mod = THIS_MODULE, \ + .mod_name = KBUILD_MODNAME, \ + .base = _base, \ + .map_type = _maptype, \ + .length = NUM_TYPE_ARGS(char*, __VA_ARGS__), \ + .class_names = _var##_classnames, \ + } +#define NUM_TYPE_ARGS(eltype, ...) \ + (sizeof((eltype[]){__VA_ARGS__}) / sizeof(eltype)) + +/* encapsulate linker provided built-in (or module) dyndbg data */ +struct _ddebug_info { + struct _ddebug *descs; + struct ddebug_class_map *classes; + unsigned int num_descs; + unsigned int num_classes; +}; + +struct ddebug_class_param { + union { + unsigned long *bits; + unsigned int *lvl; + }; + char flags[8]; + const struct ddebug_class_map *map; +}; #if defined(CONFIG_DYNAMIC_DEBUG_CORE) -/* exported for module authors to exercise >control */ -int dynamic_debug_exec_queries(const char *query, const char *modname); +int ddebug_add_module(struct _ddebug_info *dyndbg, const char *modname); -int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname); extern int ddebug_remove_module(const char *mod_name); extern __printf(2, 3) void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); @@ -87,7 +159,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, const struct ib_device *ibdev, const char *fmt, ...); -#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ +#define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt) \ static struct _ddebug __aligned(8) \ __section("__dyndbg") name = { \ .modname = KBUILD_MODNAME, \ @@ -96,8 +168,14 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, .format = (fmt), \ .lineno = __LINE__, \ .flags = _DPRINTK_FLAGS_DEFAULT, \ + .class_id = cls, \ _DPRINTK_KEY_INIT \ - } + }; \ + BUILD_BUG_ON_MSG(cls > _DPRINTK_CLASS_DFLT, \ + "classid value overflow") + +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, _DPRINTK_CLASS_DFLT, fmt) #ifdef CONFIG_JUMP_LABEL @@ -128,17 +206,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #endif /* CONFIG_JUMP_LABEL */ -#define __dynamic_func_call(id, fmt, func, ...) do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(id)) \ - func(&id, ##__VA_ARGS__); \ -} while (0) - -#define __dynamic_func_call_no_desc(id, fmt, func, ...) do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ +/* + * Factory macros: ($prefix)dynamic_func_call($suffix) + * + * Lower layer (with __ prefix) gets the callsite metadata, and wraps + * the func inside a debug-branch/static-key construct. Upper layer + * (with _ prefix) does the UNIQUE_ID once, so that lower can ref the + * name/label multiple times, and tie the elements together. + * Multiple flavors: + * (|_cls): adds in _DPRINT_CLASS_DFLT as needed + * (|_no_desc): former gets callsite descriptor as 1st arg (for prdbgs) + */ +#define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ if (DYNAMIC_DEBUG_BRANCH(id)) \ - func(__VA_ARGS__); \ + func(&id, ##__VA_ARGS__); \ +} while (0) +#define __dynamic_func_call(id, fmt, func, ...) \ + __dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt, \ + func, ##__VA_ARGS__) + +#define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(id)) \ + func(__VA_ARGS__); \ } while (0) +#define __dynamic_func_call_no_desc(id, fmt, func, ...) \ + __dynamic_func_call_cls_no_desc(id, _DPRINTK_CLASS_DFLT, \ + fmt, func, ##__VA_ARGS__) /* * "Factory macro" for generating a call to func, guarded by a @@ -148,22 +243,33 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, * the varargs. Note that fmt is repeated in invocations of this * macro. */ +#define _dynamic_func_call_cls(cls, fmt, func, ...) \ + __dynamic_func_call_cls(__UNIQUE_ID(ddebug), cls, fmt, func, ##__VA_ARGS__) #define _dynamic_func_call(fmt, func, ...) \ - __dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) + _dynamic_func_call_cls(_DPRINTK_CLASS_DFLT, fmt, func, ##__VA_ARGS__) + /* * A variant that does the same, except that the descriptor is not * passed as the first argument to the function; it is only called * with precisely the macro's varargs. */ -#define _dynamic_func_call_no_desc(fmt, func, ...) \ - __dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) +#define _dynamic_func_call_cls_no_desc(cls, fmt, func, ...) \ + __dynamic_func_call_cls_no_desc(__UNIQUE_ID(ddebug), cls, fmt, \ + func, ##__VA_ARGS__) +#define _dynamic_func_call_no_desc(fmt, func, ...) \ + _dynamic_func_call_cls_no_desc(_DPRINTK_CLASS_DFLT, fmt, \ + func, ##__VA_ARGS__) + +#define dynamic_pr_debug_cls(cls, fmt, ...) \ + _dynamic_func_call_cls(cls, fmt, __dynamic_pr_debug, \ + pr_fmt(fmt), ##__VA_ARGS__) #define dynamic_pr_debug(fmt, ...) \ - _dynamic_func_call(fmt, __dynamic_pr_debug, \ + _dynamic_func_call(fmt, __dynamic_pr_debug, \ pr_fmt(fmt), ##__VA_ARGS__) #define dynamic_dev_dbg(dev, fmt, ...) \ - _dynamic_func_call(fmt,__dynamic_dev_dbg, \ + _dynamic_func_call(fmt, __dynamic_dev_dbg, \ dev, fmt, ##__VA_ARGS__) #define dynamic_netdev_dbg(dev, fmt, ...) \ @@ -181,14 +287,24 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) +struct kernel_param; +int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp); +int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp); + +/* for test only, generally expect drm.debug style macro wrappers */ +#define __pr_debug_cls(cls, fmt, ...) do { \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(cls), \ + "expecting constant class int/enum"); \ + dynamic_pr_debug_cls(cls, fmt, ##__VA_ARGS__); \ + } while (0) + #else /* !CONFIG_DYNAMIC_DEBUG_CORE */ #include <linux/string.h> #include <linux/errno.h> #include <linux/printk.h> -static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname) +static inline int ddebug_add_module(struct _ddebug_info *dinfo, const char *modname) { return 0; } @@ -201,7 +317,7 @@ static inline int ddebug_remove_module(const char *mod) static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *modname) { - if (strstr(param, "dyndbg")) { + if (!strcmp(param, "dyndbg")) { /* avoid pr_warn(), which wants pr_fmt() fully defined */ printk(KERN_WARNING "dyndbg param is supported only in " "CONFIG_DYNAMIC_DEBUG builds\n"); @@ -221,12 +337,14 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, rowsize, groupsize, buf, len, ascii); \ } while (0) -static inline int dynamic_debug_exec_queries(const char *query, const char *modname) -{ - pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); - return 0; -} +struct kernel_param; +static inline int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp) +{ return 0; } +static inline int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp) +{ return 0; } #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ +extern const struct kernel_param_ops param_ops_dyndbg_classes; + #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index d2b84c2fec39..da3974bf05d3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -368,6 +368,9 @@ void efi_native_runtime_setup(void); #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) #define LOADED_IMAGE_PROTOCOL_GUID EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) +#define LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0xbc62157e, 0x3e33, 0x4fec, 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf) +#define EFI_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0x09576e91, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) +#define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) #define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) @@ -408,8 +411,10 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) +#define LINUX_EFI_ZBOOT_MEDIA_GUID EFI_GUID(0xe565a30d, 0x47da, 0x4dbd, 0xb3, 0x54, 0x9b, 0xb5, 0xc8, 0x4f, 0x8b, 0xe2) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) #define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47) +#define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) #define RISCV_EFI_BOOT_PROTOCOL_GUID EFI_GUID(0xccd15fec, 0x6f73, 0x4eec, 0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf) @@ -518,6 +523,15 @@ typedef union { efi_system_table_32_t mixed_mode; } efi_system_table_t; +struct efi_boot_memmap { + unsigned long map_size; + unsigned long desc_size; + u32 desc_ver; + unsigned long map_key; + unsigned long buff_size; + efi_memory_desc_t map[]; +}; + /* * Architecture independent structure for describing a memory map for the * benefit of efi_memmap_init_early(), and for passing context between @@ -952,6 +966,7 @@ extern int efi_status_to_err(efi_status_t status); #define EFI_DEV_MEDIA_VENDOR 3 #define EFI_DEV_MEDIA_FILE 4 #define EFI_DEV_MEDIA_PROTOCOL 5 +#define EFI_DEV_MEDIA_REL_OFFSET 8 #define EFI_DEV_BIOS_BOOT 0x05 #define EFI_DEV_END_PATH 0x7F #define EFI_DEV_END_PATH2 0xFF @@ -982,12 +997,27 @@ struct efi_vendor_dev_path { u8 vendordata[]; } __packed; +struct efi_rel_offset_dev_path { + struct efi_generic_dev_path header; + u32 reserved; + u64 starting_offset; + u64 ending_offset; +} __packed; + +struct efi_mem_mapped_dev_path { + struct efi_generic_dev_path header; + u32 memory_type; + u64 starting_addr; + u64 ending_addr; +} __packed; + struct efi_dev_path { union { struct efi_generic_dev_path header; struct efi_acpi_dev_path acpi; struct efi_pci_dev_path pci; struct efi_vendor_dev_path vendor; + struct efi_rel_offset_dev_path rel_offset; }; } __packed; @@ -1321,6 +1351,11 @@ struct linux_efi_coco_secret_area { u64 size; }; +struct linux_efi_initrd { + unsigned long base; + unsigned long size; +}; + /* Header of a populated EFI secret area */ #define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 84a466b176cf..d95ab85f96ba 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -253,7 +253,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } /** * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs - * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index c2b1d4fd5987..fe7e6ba918f1 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -10,8 +10,10 @@ #include <linux/compiler.h> #include <linux/types.h> -/* __used is needed to keep __crc_* for LTO */ #define SYMBOL_CRC(sym, crc, sec) \ - u32 __section("___kcrctab" sec "+" #sym) __used __crc_##sym = crc + asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ + "__crc_" #sym ":" "\n" \ + ".long " #crc "\n" \ + ".previous" "\n") #endif /* __LINUX_EXPORT_INTERNAL_H__ */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d445150c5350..ee0d75d9a302 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -73,6 +73,42 @@ struct f2fs_device { __le32 total_segments; } __packed; +/* reason of stop_checkpoint */ +enum stop_cp_reason { + STOP_CP_REASON_SHUTDOWN, + STOP_CP_REASON_FAULT_INJECT, + STOP_CP_REASON_META_PAGE, + STOP_CP_REASON_WRITE_FAIL, + STOP_CP_REASON_CORRUPTED_SUMMARY, + STOP_CP_REASON_UPDATE_INODE, + STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_MAX, +}; + +#define MAX_STOP_REASON 32 + +/* detail reason for EFSCORRUPTED */ +enum f2fs_error { + ERROR_CORRUPTED_CLUSTER, + ERROR_FAIL_DECOMPRESSION, + ERROR_INVALID_BLKADDR, + ERROR_CORRUPTED_DIRENT, + ERROR_CORRUPTED_INODE, + ERROR_INCONSISTENT_SUMMARY, + ERROR_INCONSISTENT_FOOTER, + ERROR_INCONSISTENT_SUM_TYPE, + ERROR_CORRUPTED_JOURNAL, + ERROR_INCONSISTENT_NODE_COUNT, + ERROR_INCONSISTENT_BLOCK_COUNT, + ERROR_INVALID_CURSEG, + ERROR_INCONSISTENT_SIT, + ERROR_CORRUPTED_VERITY_XATTR, + ERROR_CORRUPTED_XATTR, + ERROR_MAX, +}; + +#define MAX_F2FS_ERRORS 16 + struct f2fs_super_block { __le32 magic; /* Magic Number */ __le16 major_ver; /* Major Version */ @@ -116,7 +152,9 @@ struct f2fs_super_block { __u8 hot_ext_count; /* # of hot file extension */ __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ - __u8 reserved[306]; /* valid reserved region */ + __u8 s_stop_reason[MAX_STOP_REASON]; /* stop checkpoint reason */ + __u8 s_errors[MAX_F2FS_ERRORS]; /* reason of image corrupts */ + __u8 reserved[258]; /* valid reserved region */ __le32 crc; /* checksum of superblock */ } __packed; diff --git a/include/linux/find.h b/include/linux/find.h index 424ef67d4a42..ccaf61a0f5fd 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -8,15 +8,33 @@ #include <linux/bitops.h> -extern unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le); +unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits, + unsigned long start); +unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); +unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); +unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, + unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n); +unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n); +unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); +#ifdef __BIG_ENDIAN +unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size); +unsigned long _find_next_zero_bit_le(const unsigned long *addr, unsigned + long size, unsigned long offset); +unsigned long _find_next_bit_le(const unsigned long *addr, unsigned + long size, unsigned long offset); +#endif + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -41,7 +59,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, return val ? __ffs(val) : size; } - return _find_next_bit(addr, NULL, size, offset, 0UL, 0); + return _find_next_bit(addr, size, offset); } #endif @@ -71,7 +89,38 @@ unsigned long find_next_and_bit(const unsigned long *addr1, return val ? __ffs(val) : size; } - return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); + return _find_next_and_bit(addr1, addr2, size, offset); +} +#endif + +#ifndef find_next_andnot_bit +/** + * find_next_andnot_bit - find the next set bit in *addr1 excluding all the bits + * in *addr2 + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & ~*addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_andnot_bit(addr1, addr2, size, offset); } #endif @@ -99,7 +148,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, return val == ~0UL ? size : ffz(val); } - return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); + return _find_next_zero_bit(addr, size, offset); } #endif @@ -125,6 +174,87 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) } #endif +/** + * find_nth_bit - find N'th set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * The following is semantically equivalent: + * idx = find_nth_bit(addr, size, 0); + * idx = find_first_bit(addr, size); + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_bit(addr, size, n); +} + +/** + * find_nth_and_bit - find N'th set bit in 2 memory regions + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_and_bit(addr1, addr2, size, n); +} + +/** + * find_nth_andnot_bit - find N'th set bit in 2 memory regions, + * flipping bits in 2nd region + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_andnot_bit(addr1, addr2, size, n); +} + #ifndef find_first_and_bit /** * find_first_and_bit - find the first set bit in both memory regions @@ -194,6 +324,78 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) #endif /** + * find_next_and_bit_wrap - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit, or first set bit up to @offset + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit_wrap(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size, unsigned long offset) +{ + unsigned long bit = find_next_and_bit(addr1, addr2, size, offset); + + if (bit < size) + return bit; + + bit = find_first_and_bit(addr1, addr2, offset); + return bit < offset ? bit : size; +} + +/** + * find_next_bit_wrap - find the next set bit in both memory regions + * @addr: The first address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit, or first set bit up to @offset + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_bit_wrap(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + unsigned long bit = find_next_bit(addr, size, offset); + + if (bit < size) + return bit; + + bit = find_first_bit(addr, offset); + return bit < offset ? bit : size; +} + +/* + * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing + * before using it alone. + */ +static inline +unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size, + unsigned long start, unsigned long n) +{ + unsigned long bit; + + /* If not wrapped around */ + if (n > start) { + /* and have a bit, just return it. */ + bit = find_next_bit(bitmap, size, n); + if (bit < size) + return bit; + + /* Otherwise, wrap around and ... */ + n = 0; + } + + /* Search the other part. */ + bit = find_next_bit(bitmap, start, n); + return bit < start ? bit : size; +} + +/** * find_next_clump8 - find next 8-bit clump with set bits in a memory region * @clump: location to store copy of found clump * @addr: address to base the search on @@ -247,7 +449,21 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned return val == ~0UL ? size : ffz(val); } - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); + return _find_next_zero_bit_le(addr, size, offset); +} +#endif + +#ifndef find_first_zero_bit_le +static inline +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = swab(*(const unsigned long *)addr) | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit_le(addr, size); } #endif @@ -266,40 +482,39 @@ unsigned long find_next_bit_le(const void *addr, unsigned return val ? __ffs(val) : size; } - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); + return _find_next_bit_le(addr, size, offset); } #endif -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - #else #error "Please fix <asm/byteorder.h>" #endif #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), 0); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) + for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) + +#define for_each_and_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_and_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + +#define for_each_andnot_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) + for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) #define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), 0); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + for ((bit) = 0; \ + (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); \ + (bit)++) /* same as for_each_clear_bit() but use bit as value to start with */ #define for_each_clear_bit_from(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + for (; (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); (bit)++) /** * for_each_set_bitrange - iterate over all set bit ranges [b; e) @@ -309,11 +524,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_set_bitrange(b, e, addr, size) \ - for ((b) = find_next_bit((addr), (size), 0), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + for ((b) = 0; \ + (b) = find_next_bit((addr), (size), b), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_bit((addr), (size), (e) + 1), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_set_bitrange_from - iterate over all set bit ranges [b; e) @@ -323,11 +538,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_set_bitrange_from(b, e, addr, size) \ - for ((b) = find_next_bit((addr), (size), (b)), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + for (; \ + (b) = find_next_bit((addr), (size), (b)), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_bit((addr), (size), (e) + 1), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_clear_bitrange - iterate over all unset bit ranges [b; e) @@ -337,11 +552,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_clear_bitrange(b, e, addr, size) \ - for ((b) = find_next_zero_bit((addr), (size), 0), \ - (e) = find_next_bit((addr), (size), (b) + 1); \ + for ((b) = 0; \ + (b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_zero_bit((addr), (size), (e) + 1), \ - (e) = find_next_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e) @@ -351,11 +566,24 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_clear_bitrange_from(b, e, addr, size) \ - for ((b) = find_next_zero_bit((addr), (size), (b)), \ - (e) = find_next_bit((addr), (size), (b) + 1); \ + for (; \ + (b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_zero_bit((addr), (size), (e) + 1), \ - (e) = find_next_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) + +/** + * for_each_set_bit_wrap - iterate over all set bits starting from @start, and + * wrapping around the end of bitmap. + * @bit: offset for current iteration + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + * @start: Starting bit for bitmap traversing, wrapping around the bitmap end + */ +#define for_each_set_bit_wrap(bit, addr, size, start) \ + for ((bit) = find_next_bit_wrap((addr), (size), (start)); \ + (bit) < (size); \ + (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1)) /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index b62c90cfafaf..4029fe368a4f 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -328,8 +328,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * __struct_size() vs __member_size() must be captured here to avoid * evaluating argument side-effects further into the macro layers. */ +#ifndef CONFIG_KMSAN #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ __struct_size(p), __member_size(p)) +#endif /* * To make sure the compiler can enforce protection against buffer overflows, diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 0621c5f86c39..b303472255be 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -8,9 +8,11 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> +#include <linux/jump_label.h> #ifdef CONFIG_FREEZER -extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +DECLARE_STATIC_KEY_FALSE(freezer_active); + extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ @@ -22,10 +24,7 @@ extern unsigned int freeze_timeout_msecs; /* * Check if a process has been frozen */ -static inline bool frozen(struct task_struct *p) -{ - return p->flags & PF_FROZEN; -} +extern bool frozen(struct task_struct *p); extern bool freezing_slow_path(struct task_struct *p); @@ -34,9 +33,10 @@ extern bool freezing_slow_path(struct task_struct *p); */ static inline bool freezing(struct task_struct *p) { - if (likely(!atomic_read(&system_freezing_cnt))) - return false; - return freezing_slow_path(p); + if (static_branch_unlikely(&freezer_active)) + return freezing_slow_path(p); + + return false; } /* Takes and releases task alloc lock using task_lock() */ @@ -48,23 +48,14 @@ extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); -/* - * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION - * If try_to_freeze causes a lockdep warning it means the caller may deadlock - */ -static inline bool try_to_freeze_unsafe(void) +static inline bool try_to_freeze(void) { might_sleep(); if (likely(!freezing(current))) return false; - return __refrigerator(false); -} - -static inline bool try_to_freeze(void) -{ if (!(current->flags & PF_NOFREEZE)) debug_check_no_locks_held(); - return try_to_freeze_unsafe(); + return __refrigerator(false); } extern bool freeze_task(struct task_struct *p); @@ -79,195 +70,6 @@ static inline bool cgroup_freezing(struct task_struct *task) } #endif /* !CONFIG_CGROUP_FREEZER */ -/* - * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it - * calls wait_for_completion(&vfork) and reset right after it returns from this - * function. Next, the parent should call try_to_freeze() to freeze itself - * appropriately in case the child has exited before the freezing of tasks is - * complete. However, we don't want kernel threads to be frozen in unexpected - * places, so we allow them to block freeze_processes() instead or to set - * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the - * parent won't really block freeze_processes(), since ____call_usermodehelper() - * (the child) does a little before exec/exit and it can't be frozen before - * waking up the parent. - */ - - -/** - * freezer_do_not_count - tell freezer to ignore %current - * - * Tell freezers to ignore the current task when determining whether the - * target frozen state is reached. IOW, the current task will be - * considered frozen enough by freezers. - * - * The caller shouldn't do anything which isn't allowed for a frozen task - * until freezer_cont() is called. Usually, freezer[_do_not]_count() pair - * wrap a scheduling operation and nothing much else. - */ -static inline void freezer_do_not_count(void) -{ - current->flags |= PF_FREEZER_SKIP; -} - -/** - * freezer_count - tell freezer to stop ignoring %current - * - * Undo freezer_do_not_count(). It tells freezers that %current should be - * considered again and tries to freeze if freezing condition is already in - * effect. - */ -static inline void freezer_count(void) -{ - current->flags &= ~PF_FREEZER_SKIP; - /* - * If freezing is in progress, the following paired with smp_mb() - * in freezer_should_skip() ensures that either we see %true - * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP. - */ - smp_mb(); - try_to_freeze(); -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline void freezer_count_unsafe(void) -{ - current->flags &= ~PF_FREEZER_SKIP; - smp_mb(); - try_to_freeze_unsafe(); -} - -/** - * freezer_should_skip - whether to skip a task when determining frozen - * state is reached - * @p: task in quesion - * - * This function is used by freezers after establishing %true freezing() to - * test whether a task should be skipped when determining the target frozen - * state is reached. IOW, if this function returns %true, @p is considered - * frozen enough. - */ -static inline bool freezer_should_skip(struct task_struct *p) -{ - /* - * The following smp_mb() paired with the one in freezer_count() - * ensures that either freezer_count() sees %true freezing() or we - * see cleared %PF_FREEZER_SKIP and return %false. This makes it - * impossible for a task to slip frozen state testing after - * clearing %PF_FREEZER_SKIP. - */ - smp_mb(); - return p->flags & PF_FREEZER_SKIP; -} - -/* - * These functions are intended to be used whenever you want allow a sleeping - * task to be frozen. Note that neither return any clear indication of - * whether a freeze event happened while in this function. - */ - -/* Like schedule(), but should not block the freezer. */ -static inline void freezable_schedule(void) -{ - freezer_do_not_count(); - schedule(); - freezer_count(); -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline void freezable_schedule_unsafe(void) -{ - freezer_do_not_count(); - schedule(); - freezer_count_unsafe(); -} - -/* - * Like schedule_timeout(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline long freezable_schedule_timeout(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout(timeout); - freezer_count(); - return __retval; -} - -/* - * Like schedule_timeout_interruptible(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline long freezable_schedule_timeout_interruptible(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_interruptible(timeout); - freezer_count(); - return __retval; -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) -{ - long __retval; - - freezer_do_not_count(); - __retval = schedule_timeout_interruptible(timeout); - freezer_count_unsafe(); - return __retval; -} - -/* Like schedule_timeout_killable(), but should not block the freezer. */ -static inline long freezable_schedule_timeout_killable(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_killable(timeout); - freezer_count(); - return __retval; -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline long freezable_schedule_timeout_killable_unsafe(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_killable(timeout); - freezer_count_unsafe(); - return __retval; -} - -/* - * Like schedule_hrtimeout_range(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, - u64 delta, const enum hrtimer_mode mode) -{ - int __retval; - freezer_do_not_count(); - __retval = schedule_hrtimeout_range(expires, delta, mode); - freezer_count(); - return __retval; -} - -/* - * Freezer-friendly wrappers around wait_event_interruptible(), - * wait_event_killable() and wait_event_interruptible_timeout(), originally - * defined in <linux/wait.h> - */ - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -#define wait_event_freezekillable_unsafe(wq, condition) \ -({ \ - int __retval; \ - freezer_do_not_count(); \ - __retval = wait_event_killable(wq, (condition)); \ - freezer_count_unsafe(); \ - __retval; \ -}) - #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } @@ -281,35 +83,8 @@ static inline void thaw_kernel_threads(void) {} static inline bool try_to_freeze(void) { return false; } -static inline void freezer_do_not_count(void) {} -static inline void freezer_count(void) {} -static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} -#define freezable_schedule() schedule() - -#define freezable_schedule_unsafe() schedule() - -#define freezable_schedule_timeout(timeout) schedule_timeout(timeout) - -#define freezable_schedule_timeout_interruptible(timeout) \ - schedule_timeout_interruptible(timeout) - -#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ - schedule_timeout_interruptible(timeout) - -#define freezable_schedule_timeout_killable(timeout) \ - schedule_timeout_killable(timeout) - -#define freezable_schedule_timeout_killable_unsafe(timeout) \ - schedule_timeout_killable(timeout) - -#define freezable_schedule_hrtimeout_range(expires, delta, mode) \ - schedule_hrtimeout_range(expires, delta, mode) - -#define wait_event_freezekillable_unsafe(wq, condition) \ - wait_event_killable(wq, condition) - #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 619d683eb5fd..e654435f1651 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2004,8 +2004,9 @@ static inline int vfs_whiteout(struct user_namespace *mnt_userns, WHITEOUT_DEV); } -struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, - struct dentry *dentry, umode_t mode, int open_flag); +struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns, + const struct path *parentpath, + umode_t mode, int open_flag, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), @@ -2170,7 +2171,7 @@ struct inode_operations { struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct user_namespace *, struct inode *, - struct dentry *, umode_t); + struct file *, umode_t); int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, @@ -2783,6 +2784,15 @@ extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); +/* Helper for the simple case when original dentry is used */ +static inline int finish_open_simple(struct file *file, int error) +{ + if (error) + return error; + + return finish_open(file, file->f_path.dentry, NULL); +} + /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b61371e287b..62557d4bffc2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef CONFIG_TRACING - -/* flags for current->trace */ -enum { - TSK_TRACE_FL_TRACE_BIT = 0, - TSK_TRACE_FL_GRAPH_BIT = 1, -}; -enum { - TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, - TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, -}; - -static inline void set_tsk_trace_trace(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_trace(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_trace(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_TRACE; -} - -static inline void set_tsk_trace_graph(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_graph(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_graph(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_GRAPH; -} - enum ftrace_dump_mode; extern enum ftrace_dump_mode ftrace_dump_on_oops; diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 69081d899492..8c2f00018e89 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -110,7 +110,7 @@ static inline void gameport_free_port(struct gameport *gameport) static inline void gameport_set_name(struct gameport *gameport, const char *name) { - strlcpy(gameport->name, name, sizeof(gameport->name)); + strscpy(gameport->name, name, sizeof(gameport->name)); } /* diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f314be58fa77..ef4aea3b356e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -18,6 +18,9 @@ static inline int gfp_migratetype(const gfp_t gfp_flags) VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); + BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE); + BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >> + GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; @@ -33,29 +36,6 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } -/** - * gfpflags_normal_context - is gfp_flags a normal sleepable context? - * @gfp_flags: gfp_flags to test - * - * Test whether @gfp_flags indicates that the allocation is from the - * %current context and allowed to sleep. - * - * An allocation being allowed to block doesn't mean it owns the %current - * context. When direct reclaim path tries to allocate memory, the - * allocation context is nested inside whatever %current was doing at the - * time of the original allocation. The nested allocation may be allowed - * to block but modifying anything %current owns can corrupt the outer - * context's expectations. - * - * %true result from this function indicates that the allocation context - * can sleep and use anything that's associated with %current. - */ -static inline bool gfpflags_normal_context(const gfp_t gfp_flags) -{ - return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == - __GFP_DIRECT_RECLAIM; -} - #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index fe0f460d9a3b..36460ced060b 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -174,10 +174,6 @@ int desc_to_gpio(const struct gpio_desc *desc); /* Child properties interface */ struct fwnode_handle; -struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label); struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, const char *con_id, int index, enum gpiod_flags flags, @@ -554,15 +550,6 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) struct fwnode_handle; static inline -struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) -{ - return ERR_PTR(-ENOSYS); -} - -static inline struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, const char *con_id, int index, enum gpiod_flags flags, diff --git a/include/linux/hid.h b/include/linux/hid.h index 4363a63b9775..8677ae38599e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -314,15 +314,6 @@ struct hid_item { #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 #define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076 -/* - * HID report types --- Ouch! HID spec says 1 2 3! - */ - -#define HID_INPUT_REPORT 0 -#define HID_OUTPUT_REPORT 1 -#define HID_FEATURE_REPORT 2 - -#define HID_REPORT_TYPES 3 /* * HID connect requests @@ -509,7 +500,7 @@ struct hid_report { struct list_head hidinput_list; struct list_head field_entry_list; /* ordered list of input fields */ unsigned int id; /* id of this report */ - unsigned int type; /* report type */ + enum hid_report_type type; /* report type */ unsigned int application; /* application usage for this report */ struct hid_field *field[HID_MAX_FIELDS]; /* fields of the report */ struct hid_field_entry *field_entries; /* allocated memory of input field_entry */ @@ -658,6 +649,8 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + + unsigned int id; /* system unique id */ }; #define to_hid_device(pdev) \ @@ -924,20 +917,21 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); -int hid_input_report(struct hid_device *, int type, u8 *, u32, int); +int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, + int interrupt); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); void hid_output_report(struct hid_report *report, __u8 *data); -int __hid_request(struct hid_device *hid, struct hid_report *rep, int reqtype); +int __hid_request(struct hid_device *hid, struct hid_report *rep, enum hid_class_request reqtype); u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags); struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, - unsigned int type, unsigned int id, + enum hid_report_type type, unsigned int id, unsigned int application); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); struct hid_report *hid_validate_values(struct hid_device *hid, - unsigned int type, unsigned int id, + enum hid_report_type type, unsigned int id, unsigned int field_index, unsigned int report_counts); @@ -1106,10 +1100,11 @@ void hid_hw_stop(struct hid_device *hdev); int __must_check hid_hw_open(struct hid_device *hdev); void hid_hw_close(struct hid_device *hdev); void hid_hw_request(struct hid_device *hdev, - struct hid_report *report, int reqtype); + struct hid_report *report, enum hid_class_request reqtype); int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, - size_t len, unsigned char rtype, int reqtype); + size_t len, enum hid_report_type rtype, + enum hid_class_request reqtype); int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len); /** @@ -1137,7 +1132,7 @@ static inline int hid_hw_power(struct hid_device *hdev, int level) * @reqtype: hid request type */ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle, - int reqtype) + enum hid_class_request reqtype) { if (hdev->ll_driver->idle) return hdev->ll_driver->idle(hdev, report, idle, reqtype); @@ -1182,8 +1177,8 @@ static inline u32 hid_report_len(struct hid_report *report) return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, - int interrupt); +int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, + int interrupt); /* HID quirks API */ unsigned long hid_lookup_quirk(const struct hid_device *hdev); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 25679035ca28..e9912da5441b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/bug.h> #include <linux/cacheflush.h> +#include <linux/kmsan.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> @@ -311,6 +312,7 @@ static inline void copy_user_highpage(struct page *to, struct page *from, vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); + kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); } @@ -326,6 +328,7 @@ static inline void copy_highpage(struct page *to, struct page *from) vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_page(vto, vfrom); + kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); } diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 116e8bd68c99..e230c7c46110 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -87,29 +87,6 @@ #define PEH_AXUSER_CFG 0x401001 #define PEH_AXUSER_CFG_ENABLE 0xffffffff -#define QM_AXI_RRESP BIT(0) -#define QM_AXI_BRESP BIT(1) -#define QM_ECC_MBIT BIT(2) -#define QM_ECC_1BIT BIT(3) -#define QM_ACC_GET_TASK_TIMEOUT BIT(4) -#define QM_ACC_DO_TASK_TIMEOUT BIT(5) -#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) -#define QM_SQ_CQ_VF_INVALID BIT(7) -#define QM_CQ_VF_INVALID BIT(8) -#define QM_SQ_VF_INVALID BIT(9) -#define QM_DB_TIMEOUT BIT(10) -#define QM_OF_FIFO_OF BIT(11) -#define QM_DB_RANDOM_INVALID BIT(12) -#define QM_MAILBOX_TIMEOUT BIT(13) -#define QM_FLR_TIMEOUT BIT(14) - -#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ - QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ - QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ - QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) -#define QM_BASE_CE QM_ECC_1BIT - -#define QM_Q_DEPTH 1024 #define QM_MIN_QNUM 2 #define HISI_ACC_SGL_SGE_NR_MAX 255 #define QM_SHAPER_CFG 0x100164 @@ -168,6 +145,15 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_cap_bits { + QM_SUPPORT_DB_ISOLATION = 0x0, + QM_SUPPORT_FUNC_QOS, + QM_SUPPORT_STOP_QP, + QM_SUPPORT_MB_COMMAND, + QM_SUPPORT_SVA_PREFETCH, + QM_SUPPORT_RPM, +}; + struct dfx_diff_registers { u32 *regs; u32 reg_offset; @@ -232,7 +218,10 @@ struct hisi_qm_err_info { char *acpi_rst; u32 msi_wr_port; u32 ecc_2bits_mask; - u32 dev_ce_mask; + u32 qm_shutdown_mask; + u32 dev_shutdown_mask; + u32 qm_reset_mask; + u32 dev_reset_mask; u32 ce; u32 nfe; u32 fe; @@ -258,6 +247,18 @@ struct hisi_qm_err_ini { void (*err_info_init)(struct hisi_qm *qm); }; +struct hisi_qm_cap_info { + u32 type; + /* Register offset */ + u32 offset; + /* Bit offset in register */ + u32 shift; + u32 mask; + u32 v1_val; + u32 v2_val; + u32 v3_val; +}; + struct hisi_qm_list { struct mutex lock; struct list_head list; @@ -278,6 +279,9 @@ struct hisi_qm { struct pci_dev *pdev; void __iomem *io_base; void __iomem *db_io_base; + + /* Capbility version, 0: not supports */ + u32 cap_ver; u32 sqe_size; u32 qp_base; u32 qp_num; @@ -286,6 +290,8 @@ struct hisi_qm { u32 max_qp_num; u32 vfs_num; u32 db_interval; + u16 eq_depth; + u16 aeq_depth; struct list_head list; struct hisi_qm_list *qm_list; @@ -304,6 +310,8 @@ struct hisi_qm { struct hisi_qm_err_info err_info; struct hisi_qm_err_status err_status; unsigned long misc_ctl; /* driver removing and reset sched */ + /* Device capability bit */ + unsigned long caps; struct rw_semaphore qps_lock; struct idr qp_idr; @@ -326,8 +334,6 @@ struct hisi_qm { bool use_sva; bool is_frozen; - /* doorbell isolation enable */ - bool use_db_isolation; resource_size_t phys_base; resource_size_t db_phys_base; struct uacce_device *uacce; @@ -351,6 +357,8 @@ struct hisi_qp_ops { struct hisi_qp { u32 qp_id; + u16 sq_depth; + u16 cq_depth; u8 alg_type; u8 req_type; @@ -501,6 +509,9 @@ void hisi_qm_pm_init(struct hisi_qm *qm); int hisi_qm_get_dfx_access(struct hisi_qm *qm); void hisi_qm_put_dfx_access(struct hisi_qm *qm); void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); +u32 hisi_qm_get_hw_info(struct hisi_qm *qm, + const struct hisi_qm_cap_info *info_table, + u32 index, bool is_read); /* Used by VFIO ACC live migration driver */ struct pci_driver *hisi_sec_get_pf_driver(void); diff --git a/include/linux/htcpld.h b/include/linux/htcpld.h index 842fce69ac06..5f8ac9b1d724 100644 --- a/include/linux/htcpld.h +++ b/include/linux/htcpld.h @@ -13,8 +13,6 @@ struct htcpld_chip_platform_data { }; struct htcpld_core_platform_data { - unsigned int int_reset_gpio_hi; - unsigned int int_reset_gpio_lo; unsigned int i2c_adapter_id; struct htcpld_chip_platform_data *chip; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 768e5261fdae..a1341fdcf666 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -168,9 +168,8 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, - bool smaps, bool in_pf); +bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, + bool smaps, bool in_pf, bool enforce_sysfs); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -219,6 +218,9 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); +int madvise_collapse(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); @@ -321,8 +323,8 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, } static inline bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, - bool smaps, bool in_pf) + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs) { return false; } @@ -362,9 +364,16 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma, static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { - BUG(); - return 0; + return -EINVAL; +} + +static inline int madvise_collapse(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end) +{ + return -EINVAL; } + static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -435,6 +444,11 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +static inline int split_folio(struct folio *folio) +{ + return split_folio_to_list(folio, NULL); +} + /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3ec981a0d8b3..8b4f93e84868 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -16,8 +16,9 @@ struct ctl_table; struct user_struct; struct mmu_gather; +struct node; -#ifndef is_hugepd +#ifndef CONFIG_ARCH_HAS_HUGEPD typedef struct { unsigned long pd; } hugepd_t; #define is_hugepd(hugepd) (0) #define __hugepd(x) ((hugepd_t) { (x) }) @@ -114,6 +115,12 @@ struct file_region { #endif }; +struct hugetlb_vma_lock { + struct kref refs; + struct rw_semaphore rw_sema; + struct vm_area_struct *vma; +}; + extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); @@ -126,7 +133,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, long min_hpages); void hugepage_put_subpool(struct hugepage_subpool *spool); -void reset_vma_resv_huge_pages(struct vm_area_struct *vma); +void hugetlb_dup_vma_private(struct vm_area_struct *vma); void clear_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, @@ -207,13 +214,21 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags); +void hugetlb_vma_lock_read(struct vm_area_struct *vma); +void hugetlb_vma_unlock_read(struct vm_area_struct *vma); +void hugetlb_vma_lock_write(struct vm_area_struct *vma); +void hugetlb_vma_unlock_write(struct vm_area_struct *vma); +int hugetlb_vma_trylock_write(struct vm_area_struct *vma); +void hugetlb_vma_assert_locked(struct vm_area_struct *vma); +void hugetlb_vma_lock_release(struct kref *kref); + int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -225,7 +240,7 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ -static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) +static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) { } @@ -312,8 +327,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } @@ -336,6 +351,31 @@ static inline int prepare_hugepage_range(struct file *file, return -EINVAL; } +static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) +{ +} + +static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) +{ + return 1; +} + +static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) +{ +} + static inline int pmd_huge(pmd_t pmd) { return 0; @@ -665,7 +705,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -int huge_add_to_page_cache(struct page *page, struct address_space *mapping, +int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page); @@ -935,6 +975,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif +#ifdef CONFIG_NUMA +void hugetlb_register_node(struct node *node); +void hugetlb_unregister_node(struct node *node); +#endif + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; @@ -1109,6 +1154,14 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { } + +static inline void hugetlb_register_node(struct node *node) +{ +} + +static inline void hugetlb_unregister_node(struct node *node) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, @@ -1123,14 +1176,10 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) extern void __init hugetlb_cma_reserve(int order); -extern void __init hugetlb_cma_check(void); #else static inline __init void hugetlb_cma_reserve(int order) { } -static inline __init void hugetlb_cma_check(void) -{ -} #endif bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 379344828e78..630cd255d0cf 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -90,32 +90,31 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return __hugetlb_cgroup_from_page(page, true); } -static inline int __set_hugetlb_cgroup(struct page *page, +static inline void __set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) - return -1; + return; if (rsvd) set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, (unsigned long)h_cg); else set_page_private(page + SUBPAGE_INDEX_CGROUP, (unsigned long)h_cg); - return 0; } -static inline int set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - return __set_hugetlb_cgroup(page, h_cg, false); + __set_hugetlb_cgroup(page, h_cg, false); } -static inline int set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - return __set_hugetlb_cgroup(page, h_cg, true); + __set_hugetlb_cgroup(page, h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) @@ -199,16 +198,14 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return NULL; } -static inline int set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - return 0; } -static inline int set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - return 0; } static inline bool hugetlb_cgroup_disabled(void) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 78dd7035d1e5..f319bd26b030 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, extern int register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); +extern bool hw_breakpoint_is_used(void); extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int hw_breakpoint_weight(struct perf_event *bp); int arch_reserve_bp_slot(struct perf_event *bp); void arch_release_bp_slot(struct perf_event *bp); void arch_unregister_hw_breakpoint(struct perf_event *bp); @@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } +static inline bool hw_breakpoint_is_used(void) { return false; } + static inline int reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } static inline void release_bp_slot(struct perf_event *bp) { } diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index aa1d4da03538..77c2885c4c13 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -50,6 +50,7 @@ struct hwrng { struct list_head list; struct kref ref; struct completion cleanup_done; + struct completion dying; }; struct device; @@ -61,4 +62,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); +extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); + #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 5fa5957586cf..6802596b017c 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -13,7 +13,7 @@ struct iio_dev; struct iio_chan_spec; struct device; -struct device_node; +struct fwnode_handle; /** * struct iio_channel - everything needed for a consumer to use a channel @@ -99,26 +99,20 @@ void iio_channel_release_all(struct iio_channel *chan); struct iio_channel *devm_iio_channel_get_all(struct device *dev); /** - * of_iio_channel_get_by_name() - get description of all that is needed to access channel. - * @np: Pointer to consumer device tree node + * fwnode_iio_channel_get_by_name() - get description of all that is needed to access channel. + * @fwnode: Pointer to consumer Firmware node * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels use within * the consumer. E.g. 'battery_voltage' */ -#ifdef CONFIG_OF -struct iio_channel *of_iio_channel_get_by_name(struct device_node *np, const char *name); -#else -static inline struct iio_channel * -of_iio_channel_get_by_name(struct device_node *np, const char *name) -{ - return NULL; -} -#endif +struct iio_channel *fwnode_iio_channel_get_by_name(struct fwnode_handle *fwnode, + const char *name); /** - * devm_of_iio_channel_get_by_name() - Resource managed version of of_iio_channel_get_by_name(). + * devm_fwnode_iio_channel_get_by_name() - Resource managed version of + * fwnode_iio_channel_get_by_name(). * @dev: Pointer to consumer device. - * @np: Pointer to consumer device tree node + * @fwnode: Pointer to consumer Firmware node * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels use within * the consumer. E.g. 'battery_voltage' @@ -129,9 +123,9 @@ of_iio_channel_get_by_name(struct device_node *np, const char *name) * The allocated iio channel is automatically released when the device is * unbound. */ -struct iio_channel *devm_of_iio_channel_get_by_name(struct device *dev, - struct device_node *np, - const char *consumer_channel); +struct iio_channel *devm_fwnode_iio_channel_get_by_name(struct device *dev, + struct fwnode_handle *fwnode, + const char *consumer_channel); struct iio_cb_buffer; /** diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 6b3586b3f952..d1f8b30a7c8b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -11,6 +11,7 @@ * checked by device drivers but should be considered * read-only as this is a core internal bit * @driver_module: used to make it harder to undercut users + * @mlock_key: lockdep class for iio_dev lock * @info_exist_lock: lock to prevent use during removal * @trig_readonly: mark the current trigger immutable * @event_interface: event chrdevs associated with interrupt lines @@ -42,6 +43,7 @@ struct iio_dev_opaque { int currentmode; int id; struct module *driver_module; + struct lock_class_key mlock_key; struct mutex info_exist_lock; bool trig_readonly; struct iio_event_interface *event_interface; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 5dfbfc991c69..f0ec8a5e5a7a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -17,7 +17,7 @@ * Currently assumes nano seconds. */ -struct of_phandle_args; +struct fwnode_reference_args; enum iio_shared_by { IIO_SEPARATE, @@ -429,6 +429,8 @@ struct iio_trigger; /* forward declaration */ * provide a custom of_xlate function that reads the * *args* and returns the appropriate index in registered * IIO channels array. + * @fwnode_xlate: fwnode based function pointer to obtain channel specifier index. + * Functionally the same as @of_xlate. * @hwfifo_set_watermark: function pointer to set the current hardware * fifo watermark level; see hwfifo_* entries in * Documentation/ABI/testing/sysfs-bus-iio for details on @@ -508,8 +510,8 @@ struct iio_info { int (*debugfs_reg_access)(struct iio_dev *indio_dev, unsigned reg, unsigned writeval, unsigned *readval); - int (*of_xlate)(struct iio_dev *indio_dev, - const struct of_phandle_args *iiospec); + int (*fwnode_xlate)(struct iio_dev *indio_dev, + const struct fwnode_reference_args *iiospec); int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, unsigned count); diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index a7aa91f3a8dc..82faa98c719a 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -17,6 +17,8 @@ enum iio_event_info { IIO_EV_INFO_HIGH_PASS_FILTER_3DB, IIO_EV_INFO_LOW_PASS_FILTER_3DB, IIO_EV_INFO_TIMEOUT, + IIO_EV_INFO_RESET_TIMEOUT, + IIO_EV_INFO_TAP2_MIN_DELAY, }; #define IIO_VAL_INT 1 @@ -63,6 +65,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_OVERSAMPLING_RATIO, IIO_CHAN_INFO_THERMOCOUPLE_TYPE, IIO_CHAN_INFO_CALIBAMBIENT, + IIO_CHAN_INFO_ZEROPOINT, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/linux/init.h b/include/linux/init.h index a0a90cd73ebe..077d7f93b402 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -134,7 +134,7 @@ static inline initcall_t initcall_from_entry(initcall_entry_t *entry) extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; -/* Used for contructor calls. */ +/* Used for constructor calls. */ typedef void (*ctor_fn_t)(void); struct file_system_type; diff --git a/include/linux/input/auo-pixcir-ts.h b/include/linux/input/auo-pixcir-ts.h deleted file mode 100644 index ed0776997a7a..000000000000 --- a/include/linux/input/auo-pixcir-ts.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Driver for AUO in-cell touchscreens - * - * Copyright (c) 2011 Heiko Stuebner <heiko@sntech.de> - * - * based on auo_touch.h from Dell Streak kernel - * - * Copyright (c) 2008 QUALCOMM Incorporated. - * Copyright (c) 2008 QUALCOMM USA, INC. - */ - -#ifndef __AUO_PIXCIR_TS_H__ -#define __AUO_PIXCIR_TS_H__ - -/* - * Interrupt modes: - * periodical: interrupt is asserted periodicaly - * compare coordinates: interrupt is asserted when coordinates change - * indicate touch: interrupt is asserted during touch - */ -#define AUO_PIXCIR_INT_PERIODICAL 0x00 -#define AUO_PIXCIR_INT_COMP_COORD 0x01 -#define AUO_PIXCIR_INT_TOUCH_IND 0x02 - -/* - * @gpio_int interrupt gpio - * @int_setting one of AUO_PIXCIR_INT_* - * @init_hw hardwarespecific init - * @exit_hw hardwarespecific shutdown - * @x_max x-resolution - * @y_max y-resolution - */ -struct auo_pixcir_ts_platdata { - int gpio_int; - int gpio_rst; - - int int_setting; - - unsigned int x_max; - unsigned int y_max; -}; - -#endif diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 42faebbaa202..501fa8486749 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -2,7 +2,7 @@ /* * This header provides generic wrappers for memory access instrumentation that - * the compiler cannot emit for: KASAN, KCSAN. + * the compiler cannot emit for: KASAN, KCSAN, KMSAN. */ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H @@ -10,6 +10,7 @@ #include <linux/compiler.h> #include <linux/kasan-checks.h> #include <linux/kcsan-checks.h> +#include <linux/kmsan-checks.h> #include <linux/types.h> /** @@ -117,10 +118,11 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); kcsan_check_read(from, n); + kmsan_copy_to_user(to, from, n, 0); } /** - * instrument_copy_from_user - instrument writes of copy_from_user + * instrument_copy_from_user_before - add instrumentation before copy_from_user * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted before the accesses. @@ -130,10 +132,61 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n) * @n number of bytes to copy */ static __always_inline void -instrument_copy_from_user(const void *to, const void __user *from, unsigned long n) +instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); kcsan_check_write(to, n); } +/** + * instrument_copy_from_user_after - add instrumentation after copy_from_user + * + * Instrument writes to kernel memory, that are due to copy_from_user (and + * variants). The instrumentation should be inserted after the accesses. + * + * @to destination address + * @from source address + * @n number of bytes to copy + * @left number of bytes not copied (as returned by copy_from_user) + */ +static __always_inline void +instrument_copy_from_user_after(const void *to, const void __user *from, + unsigned long n, unsigned long left) +{ + kmsan_unpoison_memory(to, n - left); +} + +/** + * instrument_get_user() - add instrumentation to get_user()-like macros + * + * get_user() and friends are fragile, so it may depend on the implementation + * whether the instrumentation happens before or after the data is copied from + * the userspace. + * + * @to destination variable, may not be address-taken + */ +#define instrument_get_user(to) \ +({ \ + u64 __tmp = (u64)(to); \ + kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \ + to = __tmp; \ +}) + + +/** + * instrument_put_user() - add instrumentation to put_user()-like macros + * + * put_user() and friends are fragile, so it may depend on the implementation + * whether the instrumentation happens before or after the data is copied from + * the userspace. + * + * @from source address + * @ptr userspace pointer to copy to + * @size number of bytes to copy + */ +#define instrument_put_user(from, ptr, size) \ +({ \ + kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \ +}) + #endif /* _LINUX_INSTRUMENTED_H */ diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 6bd01f7159c6..cd5c5a27557f 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -123,7 +123,7 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); int icc_nodes_remove(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); -int icc_provider_del(struct icc_provider *provider); +void icc_provider_del(struct icc_provider *provider); struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); void icc_sync_state(struct device *dev); @@ -172,9 +172,8 @@ static inline int icc_provider_add(struct icc_provider *provider) return -ENOTSUPP; } -static inline int icc_provider_del(struct icc_provider *provider) +static inline void icc_provider_del(struct icc_provider *provider) { - return -ENOTSUPP; } static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ca98aeadcc80..1f068dfdb140 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,7 +16,9 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + AMD_IOMMU_V2, APPLE_DART, + APPLE_DART2, IO_PGTABLE_NUM_FMTS, }; @@ -260,6 +262,7 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ diff --git a/include/linux/io.h b/include/linux/io.h index 5fc800390fe4..308f4f0cfb93 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -59,8 +59,6 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, resource_size_t size); -void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset, - resource_size_t size); void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ea30f00dc145..a325532aeab5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -212,7 +212,7 @@ struct iommu_iotlb_gather { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device @@ -227,7 +227,7 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops */ struct iommu_ops { - bool (*capable)(enum iommu_cap); + bool (*capable)(struct device *dev, enum iommu_cap); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); @@ -416,11 +416,9 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); @@ -697,11 +695,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - return false; -} - static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; @@ -1070,4 +1063,40 @@ void iommu_debugfs_setup(void); static inline void iommu_debugfs_setup(void) {} #endif +#ifdef CONFIG_IOMMU_DMA +#include <linux/msi.h> + +/* Setup call for arch DMA mapping code */ +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); + +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); + +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); + +#else /* CONFIG_IOMMU_DMA */ + +struct msi_desc; +struct msi_msg; + +static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +{ +} + +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + return 0; +} + +static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8a76dca9deee..27642ca15d93 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -79,7 +79,8 @@ struct resource { #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) -#define IORESOURCE_IRQ_OPTIONAL (1<<5) +#define IORESOURCE_IRQ_OPTIONAL (1<<5) +#define IORESOURCE_IRQ_WAKECAPABLE (1<<6) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) diff --git a/include/linux/iova.h b/include/linux/iova.h index c6ba6d95d79c..83c00fac2acb 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,7 +75,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#if IS_ENABLED(CONFIG_IOMMU_IOVA) +#if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h new file mode 100644 index 000000000000..c006cf0a25f3 --- /dev/null +++ b/include/linux/iova_bitmap.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#ifndef _IOVA_BITMAP_H_ +#define _IOVA_BITMAP_H_ + +#include <linux/types.h> + +struct iova_bitmap; + +typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque); + +struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, + unsigned long page_size, + u64 __user *data); +void iova_bitmap_free(struct iova_bitmap *bitmap); +int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn); +void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length); + +#endif diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e3e8c8662b49..e8240cf2611a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -11,6 +11,7 @@ #include <linux/refcount.h> #include <linux/rhashtable-types.h> #include <linux/sysctl.h> +#include <linux/percpu_counter.h> struct user_namespace; @@ -36,8 +37,8 @@ struct ipc_namespace { unsigned int msg_ctlmax; unsigned int msg_ctlmnb; unsigned int msg_ctlmni; - atomic_t msg_bytes; - atomic_t msg_hdrs; + struct percpu_counter percpu_msg_bytes; + struct percpu_counter percpu_msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 3a091d0710ae..d5e6024cb2a8 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -44,7 +44,8 @@ static const struct of_device_id drv_name##_irqchip_match_table[] = { #define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \ .data = typecheck_irq_init_cb(fn), }, -#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ + +#define IRQCHIP_PLATFORM_DRIVER_END(drv_name, ...) \ {}, \ }; \ MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ @@ -56,6 +57,7 @@ static struct platform_driver drv_name##_driver = { \ .owner = THIS_MODULE, \ .of_match_table = drv_name##_irqchip_match_table, \ .suppress_bind_attrs = true, \ + __VA_ARGS__ \ }, \ }; \ builtin_platform_driver(drv_name##_driver) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1cd4e36890fb..844a8e30e6de 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif diff --git a/include/linux/isa.h b/include/linux/isa.h index e30963190968..4fbbf5e36e08 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -38,6 +38,32 @@ static inline void isa_unregister_driver(struct isa_driver *d) } #endif +#define module_isa_driver_init(__isa_driver, __num_isa_dev) \ +static int __init __isa_driver##_init(void) \ +{ \ + return isa_register_driver(&(__isa_driver), __num_isa_dev); \ +} \ +module_init(__isa_driver##_init) + +#define module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq) \ +static int __init __isa_driver##_init(void) \ +{ \ + if (__num_irq != __num_isa_dev) { \ + pr_err("%s: Number of irq (%u) does not match number of base (%u)\n", \ + __isa_driver.driver.name, __num_irq, __num_isa_dev); \ + return -EINVAL; \ + } \ + return isa_register_driver(&(__isa_driver), __num_isa_dev); \ +} \ +module_init(__isa_driver##_init) + +#define module_isa_driver_exit(__isa_driver) \ +static void __exit __isa_driver##_exit(void) \ +{ \ + isa_unregister_driver(&(__isa_driver)); \ +} \ +module_exit(__isa_driver##_exit) + /** * module_isa_driver() - Helper macro for registering a ISA driver * @__isa_driver: isa_driver struct @@ -48,16 +74,22 @@ static inline void isa_unregister_driver(struct isa_driver *d) * use this macro once, and calling it replaces module_init and module_exit. */ #define module_isa_driver(__isa_driver, __num_isa_dev) \ -static int __init __isa_driver##_init(void) \ -{ \ - return isa_register_driver(&(__isa_driver), __num_isa_dev); \ -} \ -module_init(__isa_driver##_init); \ -static void __exit __isa_driver##_exit(void) \ -{ \ - isa_unregister_driver(&(__isa_driver)); \ -} \ -module_exit(__isa_driver##_exit); +module_isa_driver_init(__isa_driver, __num_isa_dev); \ +module_isa_driver_exit(__isa_driver) + +/** + * module_isa_driver_with_irq() - Helper macro for registering an ISA driver with irq + * @__isa_driver: isa_driver struct + * @__num_isa_dev: number of devices to register + * @__num_irq: number of IRQ to register + * + * Helper macro for ISA drivers with irq that do not do anything special in + * module init/exit. Each module may only use this macro once, and calling it + * replaces module_init and module_exit. + */ +#define module_isa_driver_with_irq(__isa_driver, __num_isa_dev, __num_irq) \ +module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq); \ +module_isa_driver_exit(__isa_driver) /** * max_num_isa_dev() - Maximum possible number registered of an ISA device diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 3bfebde5a1a6..e27bd4f55d84 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -123,17 +123,12 @@ inode_peek_iversion_raw(const struct inode *inode) static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { - u64 cur, old; + u64 cur = inode_peek_iversion_raw(inode); - cur = inode_peek_iversion_raw(inode); - for (;;) { + do { if (cur > val) break; - old = atomic64_cmpxchg(&inode->i_version, cur, val); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val)); } /** @@ -177,56 +172,7 @@ inode_set_iversion_queried(struct inode *inode, u64 val) I_VERSION_QUERIED); } -/** - * inode_maybe_inc_iversion - increments i_version - * @inode: inode with the i_version that should be updated - * @force: increment the counter even if it's not necessary? - * - * Every time the inode is modified, the i_version field must be seen to have - * changed by any observer. - * - * If "force" is set or the QUERIED flag is set, then ensure that we increment - * the value, and clear the queried flag. - * - * In the common case where neither is set, then we can return "false" without - * updating i_version. - * - * If this function returns false, and no other metadata has changed, then we - * can avoid logging the metadata. - */ -static inline bool -inode_maybe_inc_iversion(struct inode *inode, bool force) -{ - u64 cur, old, new; - - /* - * The i_version field is not strictly ordered with any other inode - * information, but the legacy inode_inc_iversion code used a spinlock - * to serialize increments. - * - * Here, we add full memory barriers to ensure that any de-facto - * ordering with other info is preserved. - * - * This barrier pairs with the barrier in inode_query_iversion() - */ - smp_mb(); - cur = inode_peek_iversion_raw(inode); - for (;;) { - /* If flag is clear then we needn't do anything */ - if (!force && !(cur & I_VERSION_QUERIED)) - return false; - - /* Since lowest bit is flag, add 2 to avoid it */ - new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; - - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } - return true; -} - +bool inode_maybe_inc_iversion(struct inode *inode, bool force); /** * inode_inc_iversion - forcibly increment i_version @@ -304,10 +250,10 @@ inode_peek_iversion(const struct inode *inode) static inline u64 inode_query_iversion(struct inode *inode) { - u64 cur, old, new; + u64 cur, new; cur = inode_peek_iversion_raw(inode); - for (;;) { + do { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* @@ -320,11 +266,7 @@ inode_query_iversion(struct inode *inode) } new = cur | I_VERSION_QUERIED; - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return cur >> I_VERSION_QUERIED_SHIFT; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b092277bf48d..d811b3d7d2a1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -98,19 +98,13 @@ static inline bool kasan_has_integrated_init(void) #ifdef CONFIG_KASAN struct kasan_cache { +#ifdef CONFIG_KASAN_GENERIC int alloc_meta_offset; int free_meta_offset; +#endif bool is_kmalloc; }; -slab_flags_t __kasan_never_merge(void); -static __always_inline slab_flags_t kasan_never_merge(void) -{ - if (kasan_enabled()) - return __kasan_never_merge(); - return 0; -} - void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) { @@ -134,15 +128,6 @@ static __always_inline void kasan_unpoison_pages(struct page *page, __kasan_unpoison_pages(page, order, init); } -void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags); -static __always_inline void kasan_cache_create(struct kmem_cache *cache, - unsigned int *size, slab_flags_t *flags) -{ - if (kasan_enabled()) - __kasan_cache_create(cache, size, flags); -} - void __kasan_cache_create_kmalloc(struct kmem_cache *cache); static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) { @@ -150,14 +135,6 @@ static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) __kasan_cache_create_kmalloc(cache); } -size_t __kasan_metadata_size(struct kmem_cache *cache); -static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) -{ - if (kasan_enabled()) - return __kasan_metadata_size(cache); - return 0; -} - void __kasan_poison_slab(struct slab *slab); static __always_inline void kasan_poison_slab(struct slab *slab) { @@ -269,20 +246,12 @@ static __always_inline bool kasan_check_byte(const void *addr) #else /* CONFIG_KASAN */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_poison_pages(struct page *page, unsigned int order, bool init) {} static inline void kasan_unpoison_pages(struct page *page, unsigned int order, bool init) {} -static inline void kasan_cache_create(struct kmem_cache *cache, - unsigned int *size, - slab_flags_t *flags) {} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} -static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -333,6 +302,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC +size_t kasan_metadata_size(struct kmem_cache *cache); +slab_flags_t kasan_never_merge(void); +void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags); + void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); @@ -340,6 +314,21 @@ void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ +/* Tag-based KASAN modes do not use per-object metadata. */ +static inline size_t kasan_metadata_size(struct kmem_cache *cache) +{ + return 0; +} +/* And thus nothing prevents cache merging. */ +static inline slab_flags_t kasan_never_merge(void) +{ + return 0; +} +/* And no cache-related metadata initialization is required. */ +static inline void kasan_cache_create(struct kmem_cache *cache, + unsigned int *size, + slab_flags_t *flags) {} + static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 367044d7708c..73f5c120def8 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -108,10 +108,12 @@ enum kernfs_node_flag { KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, + KERNFS_HIDDEN = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, + KERNFS_REMOVING = 0x4000, }; /* @flags for kernfs_create_root() */ @@ -429,6 +431,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_activate(struct kernfs_node *kn); +void kernfs_show(struct kernfs_node *kn, bool show); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 13e6c4b58f07..41a686996aaa 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -427,7 +427,7 @@ extern int kexec_load_disabled; extern bool kexec_in_progress; int crash_shrink_memory(unsigned long new_size); -size_t crash_get_memory_size(void); +ssize_t crash_get_memory_size(void); #ifndef arch_kexec_protect_crashkres /* diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 384f034ae947..70162d707caf 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -16,11 +16,13 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); #ifdef CONFIG_SHMEM -extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr); +extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, + bool install_pmd); #else -static inline void collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr) +static inline int collapse_pte_mapped_thp(struct mm_struct *mm, + unsigned long addr, bool install_pmd) { + return 0; } #endif @@ -46,9 +48,10 @@ static inline void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags) { } -static inline void collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr) +static inline int collapse_pte_mapped_thp(struct mm_struct *mm, + unsigned long addr, bool install_pmd) { + return 0; } static inline void khugepaged_min_free_kbytes_update(void) diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h new file mode 100644 index 000000000000..c4cae333deec --- /dev/null +++ b/include/linux/kmsan-checks.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KMSAN checks to be used for one-off annotations in subsystems. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ + +#ifndef _LINUX_KMSAN_CHECKS_H +#define _LINUX_KMSAN_CHECKS_H + +#include <linux/types.h> + +#ifdef CONFIG_KMSAN + +/** + * kmsan_poison_memory() - Mark the memory range as uninitialized. + * @address: address to start with. + * @size: size of buffer to poison. + * @flags: GFP flags for allocations done by this function. + * + * Until other data is written to this range, KMSAN will treat it as + * uninitialized. Error reports for this memory will reference the call site of + * kmsan_poison_memory() as origin. + */ +void kmsan_poison_memory(const void *address, size_t size, gfp_t flags); + +/** + * kmsan_unpoison_memory() - Mark the memory range as initialized. + * @address: address to start with. + * @size: size of buffer to unpoison. + * + * Until other data is written to this range, KMSAN will treat it as + * initialized. + */ +void kmsan_unpoison_memory(const void *address, size_t size); + +/** + * kmsan_check_memory() - Check the memory range for being initialized. + * @address: address to start with. + * @size: size of buffer to check. + * + * If any piece of the given range is marked as uninitialized, KMSAN will report + * an error. + */ +void kmsan_check_memory(const void *address, size_t size); + +/** + * kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace. + * @to: destination address in the userspace. + * @from: source address in the kernel. + * @to_copy: number of bytes to copy. + * @left: number of bytes not copied. + * + * If this is a real userspace data transfer, KMSAN checks the bytes that were + * actually copied to ensure there was no information leak. If @to belongs to + * the kernel space (which is possible for compat syscalls), KMSAN just copies + * the metadata. + */ +void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy, + size_t left); + +#else + +static inline void kmsan_poison_memory(const void *address, size_t size, + gfp_t flags) +{ +} +static inline void kmsan_unpoison_memory(const void *address, size_t size) +{ +} +static inline void kmsan_check_memory(const void *address, size_t size) +{ +} +static inline void kmsan_copy_to_user(void __user *to, const void *from, + size_t to_copy, size_t left) +{ +} + +#endif + +#endif /* _LINUX_KMSAN_CHECKS_H */ diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h new file mode 100644 index 000000000000..e38ae3c34618 --- /dev/null +++ b/include/linux/kmsan.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KMSAN API for subsystems. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ +#ifndef _LINUX_KMSAN_H +#define _LINUX_KMSAN_H + +#include <linux/dma-direction.h> +#include <linux/gfp.h> +#include <linux/kmsan-checks.h> +#include <linux/types.h> + +struct page; +struct kmem_cache; +struct task_struct; +struct scatterlist; +struct urb; + +#ifdef CONFIG_KMSAN + +/** + * kmsan_task_create() - Initialize KMSAN state for the task. + * @task: task to initialize. + */ +void kmsan_task_create(struct task_struct *task); + +/** + * kmsan_task_exit() - Notify KMSAN that a task has exited. + * @task: task about to finish. + */ +void kmsan_task_exit(struct task_struct *task); + +/** + * kmsan_init_shadow() - Initialize KMSAN shadow at boot time. + * + * Allocate and initialize KMSAN metadata for early allocations. + */ +void __init kmsan_init_shadow(void); + +/** + * kmsan_init_runtime() - Initialize KMSAN state and enable KMSAN. + */ +void __init kmsan_init_runtime(void); + +/** + * kmsan_memblock_free_pages() - handle freeing of memblock pages. + * @page: struct page to free. + * @order: order of @page. + * + * Freed pages are either returned to buddy allocator or held back to be used + * as metadata pages. + */ +bool __init kmsan_memblock_free_pages(struct page *page, unsigned int order); + +/** + * kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call. + * @page: struct page pointer returned by alloc_pages(). + * @order: order of allocated struct page. + * @flags: GFP flags used by alloc_pages() + * + * KMSAN marks 1<<@order pages starting at @page as uninitialized, unless + * @flags contain __GFP_ZERO. + */ +void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags); + +/** + * kmsan_free_page() - Notify KMSAN about a free_pages() call. + * @page: struct page pointer passed to free_pages(). + * @order: order of deallocated struct page. + * + * KMSAN marks freed memory as uninitialized. + */ +void kmsan_free_page(struct page *page, unsigned int order); + +/** + * kmsan_copy_page_meta() - Copy KMSAN metadata between two pages. + * @dst: destination page. + * @src: source page. + * + * KMSAN copies the contents of metadata pages for @src into the metadata pages + * for @dst. If @dst has no associated metadata pages, nothing happens. + * If @src has no associated metadata pages, @dst metadata pages are unpoisoned. + */ +void kmsan_copy_page_meta(struct page *dst, struct page *src); + +/** + * kmsan_slab_alloc() - Notify KMSAN about a slab allocation. + * @s: slab cache the object belongs to. + * @object: object pointer. + * @flags: GFP flags passed to the allocator. + * + * Depending on cache flags and GFP flags, KMSAN sets up the metadata of the + * newly created object, marking it as initialized or uninitialized. + */ +void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); + +/** + * kmsan_slab_free() - Notify KMSAN about a slab deallocation. + * @s: slab cache the object belongs to. + * @object: object pointer. + * + * KMSAN marks the freed object as uninitialized. + */ +void kmsan_slab_free(struct kmem_cache *s, void *object); + +/** + * kmsan_kmalloc_large() - Notify KMSAN about a large slab allocation. + * @ptr: object pointer. + * @size: object size. + * @flags: GFP flags passed to the allocator. + * + * Similar to kmsan_slab_alloc(), but for large allocations. + */ +void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); + +/** + * kmsan_kfree_large() - Notify KMSAN about a large slab deallocation. + * @ptr: object pointer. + * + * Similar to kmsan_slab_free(), but for large allocations. + */ +void kmsan_kfree_large(const void *ptr); + +/** + * kmsan_map_kernel_range_noflush() - Notify KMSAN about a vmap. + * @start: start of vmapped range. + * @end: end of vmapped range. + * @prot: page protection flags used for vmap. + * @pages: array of pages. + * @page_shift: page_shift passed to vmap_range_noflush(). + * + * KMSAN maps shadow and origin pages of @pages into contiguous ranges in + * vmalloc metadata address range. + */ +void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages, + unsigned int page_shift); + +/** + * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap. + * @start: start of vunmapped range. + * @end: end of vunmapped range. + * + * KMSAN unmaps the contiguous metadata ranges created by + * kmsan_map_kernel_range_noflush(). + */ +void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end); + +/** + * kmsan_ioremap_page_range() - Notify KMSAN about a ioremap_page_range() call. + * @addr: range start. + * @end: range end. + * @phys_addr: physical range start. + * @prot: page protection flags used for ioremap_page_range(). + * @page_shift: page_shift argument passed to vmap_range_noflush(). + * + * KMSAN creates new metadata pages for the physical pages mapped into the + * virtual memory. + */ +void kmsan_ioremap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int page_shift); + +/** + * kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call. + * @start: range start. + * @end: range end. + * + * KMSAN unmaps the metadata pages for the given range and, unlike for + * vunmap_page_range(), also deallocates them. + */ +void kmsan_iounmap_page_range(unsigned long start, unsigned long end); + +/** + * kmsan_handle_dma() - Handle a DMA data transfer. + * @page: first page of the buffer. + * @offset: offset of the buffer within the first page. + * @size: buffer size. + * @dir: one of possible dma_data_direction values. + * + * Depending on @direction, KMSAN: + * * checks the buffer, if it is copied to device; + * * initializes the buffer, if it is copied from device; + * * does both, if this is a DMA_BIDIRECTIONAL transfer. + */ +void kmsan_handle_dma(struct page *page, size_t offset, size_t size, + enum dma_data_direction dir); + +/** + * kmsan_handle_dma_sg() - Handle a DMA transfer using scatterlist. + * @sg: scatterlist holding DMA buffers. + * @nents: number of scatterlist entries. + * @dir: one of possible dma_data_direction values. + * + * Depending on @direction, KMSAN: + * * checks the buffers in the scatterlist, if they are copied to device; + * * initializes the buffers, if they are copied from device; + * * does both, if this is a DMA_BIDIRECTIONAL transfer. + */ +void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, + enum dma_data_direction dir); + +/** + * kmsan_handle_urb() - Handle a USB data transfer. + * @urb: struct urb pointer. + * @is_out: data transfer direction (true means output to hardware). + * + * If @is_out is true, KMSAN checks the transfer buffer of @urb. Otherwise, + * KMSAN initializes the transfer buffer. + */ +void kmsan_handle_urb(const struct urb *urb, bool is_out); + +/** + * kmsan_unpoison_entry_regs() - Handle pt_regs in low-level entry code. + * @regs: struct pt_regs pointer received from assembly code. + * + * KMSAN unpoisons the contents of the passed pt_regs, preventing potential + * false positive reports. Unlike kmsan_unpoison_memory(), + * kmsan_unpoison_entry_regs() can be called from the regions where + * kmsan_in_runtime() returns true, which is the case in early entry code. + */ +void kmsan_unpoison_entry_regs(const struct pt_regs *regs); + +#else + +static inline void kmsan_init_shadow(void) +{ +} + +static inline void kmsan_init_runtime(void) +{ +} + +static inline bool kmsan_memblock_free_pages(struct page *page, + unsigned int order) +{ + return true; +} + +static inline void kmsan_task_create(struct task_struct *task) +{ +} + +static inline void kmsan_task_exit(struct task_struct *task) +{ +} + +static inline int kmsan_alloc_page(struct page *page, unsigned int order, + gfp_t flags) +{ + return 0; +} + +static inline void kmsan_free_page(struct page *page, unsigned int order) +{ +} + +static inline void kmsan_copy_page_meta(struct page *dst, struct page *src) +{ +} + +static inline void kmsan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ +} + +static inline void kmsan_slab_free(struct kmem_cache *s, void *object) +{ +} + +static inline void kmsan_kmalloc_large(const void *ptr, size_t size, + gfp_t flags) +{ +} + +static inline void kmsan_kfree_large(const void *ptr) +{ +} + +static inline void kmsan_vmap_pages_range_noflush(unsigned long start, + unsigned long end, + pgprot_t prot, + struct page **pages, + unsigned int page_shift) +{ +} + +static inline void kmsan_vunmap_range_noflush(unsigned long start, + unsigned long end) +{ +} + +static inline void kmsan_ioremap_page_range(unsigned long start, + unsigned long end, + phys_addr_t phys_addr, + pgprot_t prot, + unsigned int page_shift) +{ +} + +static inline void kmsan_iounmap_page_range(unsigned long start, + unsigned long end) +{ +} + +static inline void kmsan_handle_dma(struct page *page, size_t offset, + size_t size, enum dma_data_direction dir) +{ +} + +static inline void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ +} + +static inline void kmsan_handle_urb(const struct urb *urb, bool is_out) +{ +} + +static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs) +{ +} + +#endif + +#endif /* _LINUX_KMSAN_H */ diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h new file mode 100644 index 000000000000..8bfa6c98176d --- /dev/null +++ b/include/linux/kmsan_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * A minimal header declaring types added by KMSAN to existing kernel structs. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ +#ifndef _LINUX_KMSAN_TYPES_H +#define _LINUX_KMSAN_TYPES_H + +/* These constants are defined in the MSan LLVM instrumentation pass. */ +#define KMSAN_RETVAL_SIZE 800 +#define KMSAN_PARAM_SIZE 800 + +struct kmsan_context_state { + char param_tls[KMSAN_PARAM_SIZE]; + char retval_tls[KMSAN_RETVAL_SIZE]; + char va_arg_tls[KMSAN_PARAM_SIZE]; + char va_arg_origin_tls[KMSAN_PARAM_SIZE]; + u64 va_arg_overflow_size_tls; + char param_origin_tls[KMSAN_PARAM_SIZE]; + u32 retval_origin_tls; +}; + +#undef KMSAN_PARAM_SIZE +#undef KMSAN_RETVAL_SIZE + +struct kmsan_ctx { + struct kmsan_context_state cstate; + int kmsan_in_runtime; + bool allow_reporting; +}; + +#endif /* _LINUX_KMSAN_TYPES_H */ diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 0b4f17418f64..7e232ba59b86 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -15,9 +15,6 @@ #include <linux/sched.h> #include <linux/sched/coredump.h> -struct stable_node; -struct mem_cgroup; - #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f4519d3689e1..32f259fa5801 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -151,12 +151,11 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members - * Bits 4-7 are reserved for more arch-independent bits. + * Bits 3-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 -#define KVM_REQ_UNHALT 3 #define KVM_REQUEST_ARCH_BASE 8 /* @@ -2248,6 +2247,19 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* + * If more than one page is being (un)accounted, @virt must be the address of + * the first page of a block of pages what were allocated together (i.e + * accounted together). + * + * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state() + * is thread-safe. + */ +static inline void kvm_account_pgtable_pages(void *virt, int nr) +{ + mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr); +} + +/* * This defines how many reserved entries we want to keep before we * kick the vcpu to the userspace to avoid dirty ring full. This * value can be tuned to higher if e.g. PML is enabled on the host. diff --git a/include/linux/libata.h b/include/linux/libata.h index 20765d1c5f80..fe990176e6ee 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -101,7 +101,7 @@ enum { ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ - ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */ + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h index fd3d0b358f22..2e4f4c3539c0 100644 --- a/include/linux/linear_range.h +++ b/include/linux/linear_range.h @@ -26,6 +26,17 @@ struct linear_range { unsigned int step; }; +#define LINEAR_RANGE(_min, _min_sel, _max_sel, _step) \ + { \ + .min = _min, \ + .min_sel = _min_sel, \ + .max_sel = _max_sel, \ + .step = _step, \ + } + +#define LINEAR_RANGE_IDX(_idx, _min, _min_sel, _max_sel, _step) \ + [_idx] = LINEAR_RANGE(_min, _min_sel, _max_sel, _step) + unsigned int linear_range_values_in_range(const struct linear_range *r); unsigned int linear_range_values_in_range_array(const struct linear_range *r, int ranges); diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h new file mode 100644 index 000000000000..2effab72add1 --- /dev/null +++ b/include/linux/maple_tree.h @@ -0,0 +1,685 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _LINUX_MAPLE_TREE_H +#define _LINUX_MAPLE_TREE_H +/* + * Maple Tree - An RCU-safe adaptive tree for storing ranges + * Copyright (c) 2018-2022 Oracle + * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> + * Matthew Wilcox <willy@infradead.org> + */ + +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +/* #define CONFIG_MAPLE_RCU_DISABLED */ +/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */ + +/* + * Allocated nodes are mutable until they have been inserted into the tree, + * at which time they cannot change their type until they have been removed + * from the tree and an RCU grace period has passed. + * + * Removed nodes have their ->parent set to point to themselves. RCU readers + * check ->parent before relying on the value that they loaded from the + * slots array. This lets us reuse the slots array for the RCU head. + * + * Nodes in the tree point to their parent unless bit 0 is set. + */ +#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) +/* 64bit sizes */ +#define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ +#define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ +#define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ +#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */ +#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) +#else +/* 32bit sizes */ +#define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ +#define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ +#define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ +#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */ +#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) +#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ + +#define MAPLE_NODE_MASK 255UL + +/* + * The node->parent of the root node has bit 0 set and the rest of the pointer + * is a pointer to the tree itself. No more bits are available in this pointer + * (on m68k, the data structure may only be 2-byte aligned). + * + * Internal non-root nodes can only have maple_range_* nodes as parents. The + * parent pointer is 256B aligned like all other tree nodes. When storing a 32 + * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an + * extra bit to store the offset. This extra bit comes from a reuse of the last + * bit in the node type. This is possible by using bit 1 to indicate if bit 2 + * is part of the type or the slot. + * + * Once the type is decided, the decision of an allocation range type or a range + * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE + * flag. + * + * Node types: + * 0x??1 = Root + * 0x?00 = 16 bit nodes + * 0x010 = 32 bit nodes + * 0x110 = 64 bit nodes + * + * Slot size and location in the parent pointer: + * type : slot location + * 0x??1 : Root + * 0x?00 : 16 bit values, type in 0-1, slot in 2-6 + * 0x010 : 32 bit values, type in 0-2, slot in 3-6 + * 0x110 : 64 bit values, type in 0-2, slot in 3-6 + */ + +/* + * This metadata is used to optimize the gap updating code and in reverse + * searching for gaps or any other code that needs to find the end of the data. + */ +struct maple_metadata { + unsigned char end; + unsigned char gap; +}; + +/* + * Leaf nodes do not store pointers to nodes, they store user data. Users may + * store almost any bit pattern. As noted above, the optimisation of storing an + * entry at 0 in the root pointer cannot be done for data which have the bottom + * two bits set to '10'. We also reserve values with the bottom two bits set to + * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs + * return errnos as a negative errno shifted right by two bits and the bottom + * two bits set to '10', and while choosing to store these values in the array + * is not an error, it may lead to confusion if you're testing for an error with + * mas_is_err(). + * + * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits + * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now. + * + * In regular B-Tree terms, pivots are called keys. The term pivot is used to + * indicate that the tree is specifying ranges, Pivots may appear in the + * subtree with an entry attached to the value whereas keys are unique to a + * specific position of a B-tree. Pivot values are inclusive of the slot with + * the same index. + */ + +struct maple_range_64 { + struct maple_pnode *parent; + unsigned long pivot[MAPLE_RANGE64_SLOTS - 1]; + union { + void __rcu *slot[MAPLE_RANGE64_SLOTS]; + struct { + void __rcu *pad[MAPLE_RANGE64_SLOTS - 1]; + struct maple_metadata meta; + }; + }; +}; + +/* + * At tree creation time, the user can specify that they're willing to trade off + * storing fewer entries in a tree in return for storing more information in + * each node. + * + * The maple tree supports recording the largest range of NULL entries available + * in this node, also called gaps. This optimises the tree for allocating a + * range. + */ +struct maple_arange_64 { + struct maple_pnode *parent; + unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1]; + void __rcu *slot[MAPLE_ARANGE64_SLOTS]; + unsigned long gap[MAPLE_ARANGE64_SLOTS]; + struct maple_metadata meta; +}; + +struct maple_alloc { + unsigned long total; + unsigned char node_count; + unsigned int request_count; + struct maple_alloc *slot[MAPLE_ALLOC_SLOTS]; +}; + +struct maple_topiary { + struct maple_pnode *parent; + struct maple_enode *next; /* Overlaps the pivot */ +}; + +enum maple_type { + maple_dense, + maple_leaf_64, + maple_range_64, + maple_arange_64, +}; + + +/** + * DOC: Maple tree flags + * + * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree + * * MT_FLAGS_USE_RCU - Operate in RCU mode + * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags + * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value + * * MT_FLAGS_LOCK_MASK - How the mt_lock is used + * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe + * * MT_FLAGS_LOCK_BH - Acquired bh-safe + * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used + * + * MAPLE_HEIGHT_MAX The largest height that can be stored + */ +#define MT_FLAGS_ALLOC_RANGE 0x01 +#define MT_FLAGS_USE_RCU 0x02 +#define MT_FLAGS_HEIGHT_OFFSET 0x02 +#define MT_FLAGS_HEIGHT_MASK 0x7C +#define MT_FLAGS_LOCK_MASK 0x300 +#define MT_FLAGS_LOCK_IRQ 0x100 +#define MT_FLAGS_LOCK_BH 0x200 +#define MT_FLAGS_LOCK_EXTERN 0x300 + +#define MAPLE_HEIGHT_MAX 31 + + +#define MAPLE_NODE_TYPE_MASK 0x0F +#define MAPLE_NODE_TYPE_SHIFT 0x03 + +#define MAPLE_RESERVED_RANGE 4096 + +#ifdef CONFIG_LOCKDEP +typedef struct lockdep_map *lockdep_map_p; +#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock) +#define mt_set_external_lock(mt, lock) \ + (mt)->ma_external_lock = &(lock)->dep_map +#else +typedef struct { /* nothing */ } lockdep_map_p; +#define mt_lock_is_held(mt) 1 +#define mt_set_external_lock(mt, lock) do { } while (0) +#endif + +/* + * If the tree contains a single entry at index 0, it is usually stored in + * tree->ma_root. To optimise for the page cache, an entry which ends in '00', + * '01' or '11' is stored in the root, but an entry which ends in '10' will be + * stored in a node. Bits 3-6 are used to store enum maple_type. + * + * The flags are used both to store some immutable information about this tree + * (set at tree creation time) and dynamic information set under the spinlock. + * + * Another use of flags are to indicate global states of the tree. This is the + * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in + * RCU mode. This mode was added to allow the tree to reuse nodes instead of + * re-allocating and RCU freeing nodes when there is a single user. + */ +struct maple_tree { + union { + spinlock_t ma_lock; + lockdep_map_p ma_external_lock; + }; + void __rcu *ma_root; + unsigned int ma_flags; +}; + +/** + * MTREE_INIT() - Initialize a maple tree + * @name: The maple tree name + * @__flags: The maple tree flags + * + */ +#define MTREE_INIT(name, __flags) { \ + .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ + .ma_flags = __flags, \ + .ma_root = NULL, \ +} + +/** + * MTREE_INIT_EXT() - Initialize a maple tree with an external lock. + * @name: The tree name + * @__flags: The maple tree flags + * @__lock: The external lock + */ +#ifdef CONFIG_LOCKDEP +#define MTREE_INIT_EXT(name, __flags, __lock) { \ + .ma_external_lock = &(__lock).dep_map, \ + .ma_flags = (__flags), \ + .ma_root = NULL, \ +} +#else +#define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) +#endif + +#define DEFINE_MTREE(name) \ + struct maple_tree name = MTREE_INIT(name, 0) + +#define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) + +/* + * The Maple Tree squeezes various bits in at various points which aren't + * necessarily obvious. Usually, this is done by observing that pointers are + * N-byte aligned and thus the bottom log_2(N) bits are available for use. We + * don't use the high bits of pointers to store additional information because + * we don't know what bits are unused on any given architecture. + * + * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8 + * low bits for our own purposes. Nodes are currently of 4 types: + * 1. Single pointer (Range is 0-0) + * 2. Non-leaf Allocation Range nodes + * 3. Non-leaf Range nodes + * 4. Leaf Range nodes All nodes consist of a number of node slots, + * pivots, and a parent pointer. + */ + +struct maple_node { + union { + struct { + struct maple_pnode *parent; + void __rcu *slot[MAPLE_NODE_SLOTS]; + }; + struct { + void *pad; + struct rcu_head rcu; + struct maple_enode *piv_parent; + unsigned char parent_slot; + enum maple_type type; + unsigned char slot_len; + unsigned int ma_flags; + }; + struct maple_range_64 mr64; + struct maple_arange_64 ma64; + struct maple_alloc alloc; + }; +}; + +/* + * More complicated stores can cause two nodes to become one or three and + * potentially alter the height of the tree. Either half of the tree may need + * to be rebalanced against the other. The ma_topiary struct is used to track + * which nodes have been 'cut' from the tree so that the change can be done + * safely at a later date. This is done to support RCU. + */ +struct ma_topiary { + struct maple_enode *head; + struct maple_enode *tail; + struct maple_tree *mtree; +}; + +void *mtree_load(struct maple_tree *mt, unsigned long index); + +int mtree_insert(struct maple_tree *mt, unsigned long index, + void *entry, gfp_t gfp); +int mtree_insert_range(struct maple_tree *mt, unsigned long first, + unsigned long last, void *entry, gfp_t gfp); +int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp); +int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp); + +int mtree_store_range(struct maple_tree *mt, unsigned long first, + unsigned long last, void *entry, gfp_t gfp); +int mtree_store(struct maple_tree *mt, unsigned long index, + void *entry, gfp_t gfp); +void *mtree_erase(struct maple_tree *mt, unsigned long index); + +void mtree_destroy(struct maple_tree *mt); +void __mt_destroy(struct maple_tree *mt); + +/** + * mtree_empty() - Determine if a tree has any present entries. + * @mt: Maple Tree. + * + * Context: Any context. + * Return: %true if the tree contains only NULL pointers. + */ +static inline bool mtree_empty(const struct maple_tree *mt) +{ + return mt->ma_root == NULL; +} + +/* Advanced API */ + +/* + * The maple state is defined in the struct ma_state and is used to keep track + * of information during operations, and even between operations when using the + * advanced API. + * + * If state->node has bit 0 set then it references a tree location which is not + * a node (eg the root). If bit 1 is set, the rest of the bits are a negative + * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the + * node type. + * + * state->alloc either has a request number of nodes or an allocated node. If + * stat->alloc has a requested number of nodes, the first bit will be set (0x1) + * and the remaining bits are the value. If state->alloc is a node, then the + * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for + * storing more allocated nodes, a total number of nodes allocated, and the + * node_count in this node. node_count is the number of allocated nodes in this + * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further + * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc + * by removing a node from the state->alloc node until state->alloc->node_count + * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted + * to state->alloc. Nodes are pushed onto state->alloc by putting the current + * state->alloc into the pushed node's slot[0]. + * + * The state also contains the implied min/max of the state->node, the depth of + * this search, and the offset. The implied min/max are either from the parent + * node or are 0-oo for the root node. The depth is incremented or decremented + * every time a node is walked down or up. The offset is the slot/pivot of + * interest in the node - either for reading or writing. + * + * When returning a value the maple state index and last respectively contain + * the start and end of the range for the entry. Ranges are inclusive in the + * Maple Tree. + */ +struct ma_state { + struct maple_tree *tree; /* The tree we're operating in */ + unsigned long index; /* The index we're operating on - range start */ + unsigned long last; /* The last index we're operating on - range end */ + struct maple_enode *node; /* The node containing this entry */ + unsigned long min; /* The minimum index of this node - implied pivot min */ + unsigned long max; /* The maximum index of this node - implied pivot max */ + struct maple_alloc *alloc; /* Allocated nodes for this operation */ + unsigned char depth; /* depth of tree descent during write */ + unsigned char offset; + unsigned char mas_flags; +}; + +struct ma_wr_state { + struct ma_state *mas; + struct maple_node *node; /* Decoded mas->node */ + unsigned long r_min; /* range min */ + unsigned long r_max; /* range max */ + enum maple_type type; /* mas->node type */ + unsigned char offset_end; /* The offset where the write ends */ + unsigned char node_end; /* mas->node end */ + unsigned long *pivots; /* mas->node->pivots pointer */ + unsigned long end_piv; /* The pivot at the offset end */ + void __rcu **slots; /* mas->node->slots pointer */ + void *entry; /* The entry to write */ + void *content; /* The existing entry that is being overwritten */ +}; + +#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) + + +/* + * Special values for ma_state.node. + * MAS_START means we have not searched the tree. + * MAS_ROOT means we have searched the tree and the entry we found lives in + * the root of the tree (ie it has index 0, length 1 and is the only entry in + * the tree). + * MAS_NONE means we have searched the tree and there is no node in the + * tree for this entry. For example, we searched for index 1 in an empty + * tree. Or we have a tree which points to a full leaf node and we + * searched for an entry which is larger than can be contained in that + * leaf node. + * MA_ERROR represents an errno. After dropping the lock and attempting + * to resolve the error, the walk would have to be restarted from the + * top of the tree as the tree may have been modified. + */ +#define MAS_START ((struct maple_enode *)1UL) +#define MAS_ROOT ((struct maple_enode *)5UL) +#define MAS_NONE ((struct maple_enode *)9UL) +#define MAS_PAUSE ((struct maple_enode *)17UL) +#define MA_ERROR(err) \ + ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) + +#define MA_STATE(name, mt, first, end) \ + struct ma_state name = { \ + .tree = mt, \ + .index = first, \ + .last = end, \ + .node = MAS_START, \ + .min = 0, \ + .max = ULONG_MAX, \ + .alloc = NULL, \ + } + +#define MA_WR_STATE(name, ma_state, wr_entry) \ + struct ma_wr_state name = { \ + .mas = ma_state, \ + .content = NULL, \ + .entry = wr_entry, \ + } + +#define MA_TOPIARY(name, tree) \ + struct ma_topiary name = { \ + .head = NULL, \ + .tail = NULL, \ + .mtree = tree, \ + } + +void *mas_walk(struct ma_state *mas); +void *mas_store(struct ma_state *mas, void *entry); +void *mas_erase(struct ma_state *mas); +int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); +void mas_store_prealloc(struct ma_state *mas, void *entry); +void *mas_find(struct ma_state *mas, unsigned long max); +void *mas_find_rev(struct ma_state *mas, unsigned long min); +int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +bool mas_is_err(struct ma_state *mas); + +bool mas_nomem(struct ma_state *mas, gfp_t gfp); +void mas_pause(struct ma_state *mas); +void maple_tree_init(void); +void mas_destroy(struct ma_state *mas); +int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries); + +void *mas_prev(struct ma_state *mas, unsigned long min); +void *mas_next(struct ma_state *mas, unsigned long max); + +int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, + unsigned long size); + +/* Checks if a mas has not found anything */ +static inline bool mas_is_none(struct ma_state *mas) +{ + return mas->node == MAS_NONE; +} + +/* Checks if a mas has been paused */ +static inline bool mas_is_paused(struct ma_state *mas) +{ + return mas->node == MAS_PAUSE; +} + +void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas); +void mas_dup_store(struct ma_state *mas, void *entry); + +/* + * This finds an empty area from the highest address to the lowest. + * AKA "Topdown" version, + */ +int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size); +/** + * mas_reset() - Reset a Maple Tree operation state. + * @mas: Maple Tree operation state. + * + * Resets the error or walk state of the @mas so future walks of the + * array will start from the root. Use this if you have dropped the + * lock and want to reuse the ma_state. + * + * Context: Any context. + */ +static inline void mas_reset(struct ma_state *mas) +{ + mas->node = MAS_START; +} + +/** + * mas_for_each() - Iterate over a range of the maple tree. + * @__mas: Maple Tree operation state (maple_state) + * @__entry: Entry retrieved from the tree + * @__max: maximum index to retrieve from the tree + * + * When returned, mas->index and mas->last will hold the entire range for the + * entry. + * + * Note: may return the zero entry. + * + */ +#define mas_for_each(__mas, __entry, __max) \ + while (((__entry) = mas_find((__mas), (__max))) != NULL) + + +/** + * mas_set_range() - Set up Maple Tree operation state for a different index. + * @mas: Maple Tree operation state. + * @start: New start of range in the Maple Tree. + * @last: New end of range in the Maple Tree. + * + * Move the operation state to refer to a different range. This will + * have the effect of starting a walk from the top; see mas_next() + * to move to an adjacent index. + */ +static inline +void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) +{ + mas->index = start; + mas->last = last; + mas->node = MAS_START; +} + +/** + * mas_set() - Set up Maple Tree operation state for a different index. + * @mas: Maple Tree operation state. + * @index: New index into the Maple Tree. + * + * Move the operation state to refer to a different index. This will + * have the effect of starting a walk from the top; see mas_next() + * to move to an adjacent index. + */ +static inline void mas_set(struct ma_state *mas, unsigned long index) +{ + + mas_set_range(mas, index, index); +} + +static inline bool mt_external_lock(const struct maple_tree *mt) +{ + return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; +} + +/** + * mt_init_flags() - Initialise an empty maple tree with flags. + * @mt: Maple Tree + * @flags: maple tree flags. + * + * If you need to initialise a Maple Tree with special flags (eg, an + * allocation tree), use this function. + * + * Context: Any context. + */ +static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) +{ + mt->ma_flags = flags; + if (!mt_external_lock(mt)) + spin_lock_init(&mt->ma_lock); + rcu_assign_pointer(mt->ma_root, NULL); +} + +/** + * mt_init() - Initialise an empty maple tree. + * @mt: Maple Tree + * + * An empty Maple Tree. + * + * Context: Any context. + */ +static inline void mt_init(struct maple_tree *mt) +{ + mt_init_flags(mt, 0); +} + +static inline bool mt_in_rcu(struct maple_tree *mt) +{ +#ifdef CONFIG_MAPLE_RCU_DISABLED + return false; +#endif + return mt->ma_flags & MT_FLAGS_USE_RCU; +} + +/** + * mt_clear_in_rcu() - Switch the tree to non-RCU mode. + * @mt: The Maple Tree + */ +static inline void mt_clear_in_rcu(struct maple_tree *mt) +{ + if (!mt_in_rcu(mt)) + return; + + if (mt_external_lock(mt)) { + BUG_ON(!mt_lock_is_held(mt)); + mt->ma_flags &= ~MT_FLAGS_USE_RCU; + } else { + mtree_lock(mt); + mt->ma_flags &= ~MT_FLAGS_USE_RCU; + mtree_unlock(mt); + } +} + +/** + * mt_set_in_rcu() - Switch the tree to RCU safe mode. + * @mt: The Maple Tree + */ +static inline void mt_set_in_rcu(struct maple_tree *mt) +{ + if (mt_in_rcu(mt)) + return; + + if (mt_external_lock(mt)) { + BUG_ON(!mt_lock_is_held(mt)); + mt->ma_flags |= MT_FLAGS_USE_RCU; + } else { + mtree_lock(mt); + mt->ma_flags |= MT_FLAGS_USE_RCU; + mtree_unlock(mt); + } +} + +void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); +void *mt_find_after(struct maple_tree *mt, unsigned long *index, + unsigned long max); +void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min); +void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); + +/** + * mt_for_each - Iterate over each entry starting at index until max. + * @__tree: The Maple Tree + * @__entry: The current entry + * @__index: The index to update to track the location in the tree + * @__max: The maximum limit for @index + * + * Note: Will not return the zero entry. + */ +#define mt_for_each(__tree, __entry, __index, __max) \ + for (__entry = mt_find(__tree, &(__index), __max); \ + __entry; __entry = mt_find_after(__tree, &(__index), __max)) + + +#ifdef CONFIG_DEBUG_MAPLE_TREE +extern atomic_t maple_tree_tests_run; +extern atomic_t maple_tree_tests_passed; + +void mt_dump(const struct maple_tree *mt); +void mt_validate(struct maple_tree *mt); +#define MT_BUG_ON(__tree, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) +#else +#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#endif /* CONFIG_DEBUG_MAPLE_TREE */ + +#endif /*_LINUX_MAPLE_TREE_H */ diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 47ad3b104d9e..139d05b26f82 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -10,6 +10,9 @@ #ifndef MDEV_H #define MDEV_H +#include <linux/device.h> +#include <linux/uuid.h> + struct mdev_type; struct mdev_device { @@ -20,67 +23,67 @@ struct mdev_device { bool active; }; -static inline struct mdev_device *to_mdev_device(struct device *dev) -{ - return container_of(dev, struct mdev_device, dev); -} +struct mdev_type { + /* set by the driver before calling mdev_register parent: */ + const char *sysfs_name; + const char *pretty_name; -unsigned int mdev_get_type_group_id(struct mdev_device *mdev); -unsigned int mtype_get_type_group_id(struct mdev_type *mtype); -struct device *mtype_get_parent_dev(struct mdev_type *mtype); + /* set by the core, can be used drivers */ + struct mdev_parent *parent; -/* interface for exporting mdev supported type attributes */ -struct mdev_type_attribute { - struct attribute attr; - ssize_t (*show)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf); - ssize_t (*store)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, const char *buf, - size_t count); + /* internal only */ + struct kobject kobj; + struct kobject *devices_kobj; }; -#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \ -struct mdev_type_attribute mdev_type_attr_##_name = \ - __ATTR(_name, _mode, _show, _store) -#define MDEV_TYPE_ATTR_RW(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name) -#define MDEV_TYPE_ATTR_RO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name) -#define MDEV_TYPE_ATTR_WO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name) +/* embedded into the struct device that the mdev devices hang off */ +struct mdev_parent { + struct device *dev; + struct mdev_driver *mdev_driver; + struct kset *mdev_types_kset; + /* Synchronize device creation/removal with parent unregistration */ + struct rw_semaphore unreg_sem; + struct mdev_type **types; + unsigned int nr_types; + atomic_t available_instances; +}; + +static inline struct mdev_device *to_mdev_device(struct device *dev) +{ + return container_of(dev, struct mdev_device, dev); +} /** * struct mdev_driver - Mediated device driver + * @device_api: string to return for the device_api sysfs + * @max_instances: maximum number of instances supported (optional) * @probe: called when new device created * @remove: called when device removed - * @supported_type_groups: Attributes to define supported types. It is mandatory - * to provide supported types. + * @get_available: Return the max number of instances that can be created + * @show_description: Print a description of the mtype * @driver: device driver structure - * **/ struct mdev_driver { + const char *device_api; + unsigned int max_instances; int (*probe)(struct mdev_device *dev); void (*remove)(struct mdev_device *dev); - struct attribute_group **supported_type_groups; + unsigned int (*get_available)(struct mdev_type *mtype); + ssize_t (*show_description)(struct mdev_type *mtype, char *buf); struct device_driver driver; }; -extern struct bus_type mdev_bus_type; - -int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); -void mdev_unregister_device(struct device *dev); +int mdev_register_parent(struct mdev_parent *parent, struct device *dev, + struct mdev_driver *mdev_driver, struct mdev_type **types, + unsigned int nr_types); +void mdev_unregister_parent(struct mdev_parent *parent); int mdev_register_driver(struct mdev_driver *drv); void mdev_unregister_driver(struct mdev_driver *drv); -struct device *mdev_parent_dev(struct mdev_device *mdev); static inline struct device *mdev_dev(struct mdev_device *mdev) { return &mdev->dev; } -static inline struct mdev_device *mdev_from_dev(struct device *dev) -{ - return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL; -} #endif /* MDEV_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 567f12323f55..e1644a24009c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -80,29 +80,8 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -struct memcg_vmstats_percpu { - /* Local (CPU and cgroup) page state & events */ - long state[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - - /* Delta calculation for lockless upward propagation */ - long state_prev[MEMCG_NR_STAT]; - unsigned long events_prev[NR_VM_EVENT_ITEMS]; - - /* Cgroup1: threshold notifications & softlimit tree updates */ - unsigned long nr_page_events; - unsigned long targets[MEM_CGROUP_NTARGETS]; -}; - -struct memcg_vmstats { - /* Aggregated (CPU and subtree) page state & events */ - long state[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - - /* Pending child counts during tree propagation */ - long state_pending[MEMCG_NR_STAT]; - unsigned long events_pending[NR_VM_EVENT_ITEMS]; -}; +struct memcg_vmstats_percpu; +struct memcg_vmstats; struct mem_cgroup_reclaim_iter { struct mem_cgroup *position; @@ -185,15 +164,6 @@ struct mem_cgroup_thresholds { struct mem_cgroup_threshold_ary *spare; }; -#if defined(CONFIG_SMP) -struct memcg_padding { - char x[0]; -} ____cacheline_internodealigned_in_smp; -#define MEMCG_PADDING(name) struct memcg_padding name -#else -#define MEMCG_PADDING(name) -#endif - /* * Remember four most recent foreign writebacks with dirty pages in this * cgroup. Inode sharing is expected to be uncommon and, even if we miss @@ -304,10 +274,10 @@ struct mem_cgroup { spinlock_t move_lock; unsigned long move_lock_flags; - MEMCG_PADDING(_pad1_); + CACHELINE_PADDING(_pad1_); /* memory.stat */ - struct memcg_vmstats vmstats; + struct memcg_vmstats *vmstats; /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; @@ -326,7 +296,7 @@ struct mem_cgroup { struct list_head objcg_list; #endif - MEMCG_PADDING(_pad2_); + CACHELINE_PADDING(_pad2_); /* * set > 0 if pages under this cgroup are moving to other cgroup. @@ -350,14 +320,20 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + /* per-memcg mm_struct list */ + struct lru_gen_mm_list mm_list; +#endif + struct mem_cgroup_per_node *nodeinfo[]; }; /* - * size of first charge trial. "32" comes from vmscan.c's magic value. - * TODO: maybe necessary to use big numbers in big irons. + * size of first charge trial. + * TODO: maybe necessary to use big numbers in big irons or dynamic based of the + * workload. */ -#define MEMCG_CHARGE_BATCH 32U +#define MEMCG_CHARGE_BATCH 64U extern struct mem_cgroup *root_mem_cgroup; @@ -444,6 +420,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -505,6 +482,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -689,7 +667,7 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } -int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, +int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -959,6 +937,23 @@ void unlock_page_memcg(struct page *page); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize folio_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -985,15 +980,7 @@ static inline void mod_memcg_page_state(struct page *page, rcu_read_unlock(); } -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - long x = READ_ONCE(memcg->vmstats.state[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} +unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) @@ -1238,7 +1225,7 @@ static inline int mem_cgroup_charge(struct folio *folio, return 0; } -static inline int mem_cgroup_swapin_charge_page(struct page *page, +static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { return 0; @@ -1433,6 +1420,18 @@ static inline void folio_memcg_unlock(struct folio *folio) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match folio_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } @@ -1779,7 +1778,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { struct mem_cgroup *memcg; - if (mem_cgroup_kmem_disabled()) + if (!memcg_kmem_enabled()) return; rcu_read_lock(); diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h new file mode 100644 index 000000000000..965009aa01d7 --- /dev/null +++ b/include/linux/memory-tiers.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MEMORY_TIERS_H +#define _LINUX_MEMORY_TIERS_H + +#include <linux/types.h> +#include <linux/nodemask.h> +#include <linux/kref.h> +#include <linux/mmzone.h> +/* + * Each tier cover a abstrace distance chunk size of 128 + */ +#define MEMTIER_CHUNK_BITS 7 +#define MEMTIER_CHUNK_SIZE (1 << MEMTIER_CHUNK_BITS) +/* + * Smaller abstract distance values imply faster (higher) memory tiers. Offset + * the DRAM adistance so that we can accommodate devices with a slightly lower + * adistance value (slightly faster) than default DRAM adistance to be part of + * the same memory tier. + */ +#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) +#define MEMTIER_HOTPLUG_PRIO 100 + +struct memory_tier; +struct memory_dev_type { + /* list of memory types that are part of same tier as this type */ + struct list_head tier_sibiling; + /* abstract distance for this specific memory type */ + int adistance; + /* Nodes of same abstract distance */ + nodemask_t nodes; + struct kref kref; +}; + +#ifdef CONFIG_NUMA +extern bool numa_demotion_enabled; +struct memory_dev_type *alloc_memory_type(int adistance); +void destroy_memory_type(struct memory_dev_type *memtype); +void init_node_memory_type(int node, struct memory_dev_type *default_type); +void clear_node_memory_type(int node, struct memory_dev_type *memtype); +#ifdef CONFIG_MIGRATION +int next_demotion_node(int node); +void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); +bool node_is_toptier(int node); +#else +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} + +static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) +{ + *targets = NODE_MASK_NONE; +} + +static inline bool node_is_toptier(int node) +{ + return true; +} +#endif + +#else + +#define numa_demotion_enabled false +/* + * CONFIG_NUMA implementation returns non NULL error. + */ +static inline struct memory_dev_type *alloc_memory_type(int adistance) +{ + return NULL; +} + +static inline void destroy_memory_type(struct memory_dev_type *memtype) +{ + +} + +static inline void init_node_memory_type(int node, struct memory_dev_type *default_type) +{ + +} + +static inline void clear_node_memory_type(int node, struct memory_dev_type *memtype) +{ + +} + +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} + +static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) +{ + *targets = NODE_MASK_NONE; +} + +static inline bool node_is_toptier(int node) +{ + return true; +} +#endif /* CONFIG_NUMA */ +#endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0b2209ab71c..9fcbf5706595 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -11,7 +11,6 @@ struct page; struct zone; struct pglist_data; struct mem_section; -struct memory_block; struct memory_group; struct resource; struct vmem_altmap; @@ -44,11 +43,6 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); ({ \ memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ }) -/* - * This definition is just for error path in node hotadd. - * For node hotremove, we have to replace this. - */ -#define generic_free_nodedata(pgdat) kfree(pgdat) extern pg_data_t *node_data[]; static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) @@ -64,9 +58,6 @@ static inline pg_data_t *generic_alloc_nodedata(int nid) BUG(); return NULL; } -static inline void generic_free_nodedata(pg_data_t *pgdat) -{ -} static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) { } @@ -216,6 +207,22 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +/* See kswapd_is_running() */ +static inline void pgdat_kswapd_lock(pg_data_t *pgdat) +{ + mutex_lock(&pgdat->kswapd_lock); +} + +static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) +{ + mutex_unlock(&pgdat->kswapd_lock); +} + +static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) +{ + mutex_init(&pgdat->kswapd_lock); +} + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ @@ -252,6 +259,10 @@ static inline bool movable_node_is_enabled(void) { return false; } + +static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} +static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} +static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} #endif /* ! CONFIG_MEMORY_HOTPLUG */ /* @@ -333,7 +344,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 668389b4b53d..d232de7cdc56 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,13 +151,6 @@ extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - struct mempolicy *mpol = get_task_policy(current); - - return policy_nodemask(gfp, mpol); -} - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -189,6 +182,7 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) return (pol->mode == MPOL_PREFERRED_MANY); } +extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); #else @@ -294,11 +288,6 @@ static inline void mpol_put_task_policy(struct task_struct *task) { } -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - return NULL; -} - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 58602032e642..9af1f3105f80 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -519,6 +519,77 @@ enum rk809_reg_id { #define MIC_DIFF_DIS (0x0 << 7) #define MIC_DIFF_EN (0x1 << 7) +/* RK817 Battery Registers */ +#define RK817_GAS_GAUGE_ADC_CONFIG0 0x50 +#define RK817_GG_EN (0x1 << 7) +#define RK817_SYS_VOL_ADC_EN (0x1 << 6) +#define RK817_TS_ADC_EN (0x1 << 5) +#define RK817_USB_VOL_ADC_EN (0x1 << 4) +#define RK817_BAT_VOL_ADC_EN (0x1 << 3) +#define RK817_BAT_CUR_ADC_EN (0x1 << 2) + +#define RK817_GAS_GAUGE_ADC_CONFIG1 0x55 + +#define RK817_VOL_CUR_CALIB_UPD BIT(7) + +#define RK817_GAS_GAUGE_GG_CON 0x56 +#define RK817_GAS_GAUGE_GG_STS 0x57 + +#define RK817_BAT_CON (0x1 << 4) +#define RK817_RELAX_VOL_UPD (0x3 << 2) +#define RK817_RELAX_STS (0x1 << 1) + +#define RK817_GAS_GAUGE_RELAX_THRE_H 0x58 +#define RK817_GAS_GAUGE_RELAX_THRE_L 0x59 +#define RK817_GAS_GAUGE_OCV_THRE_VOL 0x62 +#define RK817_GAS_GAUGE_OCV_VOL_H 0x63 +#define RK817_GAS_GAUGE_OCV_VOL_L 0x64 +#define RK817_GAS_GAUGE_PWRON_VOL_H 0x6b +#define RK817_GAS_GAUGE_PWRON_VOL_L 0x6c +#define RK817_GAS_GAUGE_PWRON_CUR_H 0x6d +#define RK817_GAS_GAUGE_PWRON_CUR_L 0x6e +#define RK817_GAS_GAUGE_OFF_CNT 0x6f +#define RK817_GAS_GAUGE_Q_INIT_H3 0x70 +#define RK817_GAS_GAUGE_Q_INIT_H2 0x71 +#define RK817_GAS_GAUGE_Q_INIT_L1 0x72 +#define RK817_GAS_GAUGE_Q_INIT_L0 0x73 +#define RK817_GAS_GAUGE_Q_PRES_H3 0x74 +#define RK817_GAS_GAUGE_Q_PRES_H2 0x75 +#define RK817_GAS_GAUGE_Q_PRES_L1 0x76 +#define RK817_GAS_GAUGE_Q_PRES_L0 0x77 +#define RK817_GAS_GAUGE_BAT_VOL_H 0x78 +#define RK817_GAS_GAUGE_BAT_VOL_L 0x79 +#define RK817_GAS_GAUGE_BAT_CUR_H 0x7a +#define RK817_GAS_GAUGE_BAT_CUR_L 0x7b +#define RK817_GAS_GAUGE_USB_VOL_H 0x7e +#define RK817_GAS_GAUGE_USB_VOL_L 0x7f +#define RK817_GAS_GAUGE_SYS_VOL_H 0x80 +#define RK817_GAS_GAUGE_SYS_VOL_L 0x81 +#define RK817_GAS_GAUGE_Q_MAX_H3 0x82 +#define RK817_GAS_GAUGE_Q_MAX_H2 0x83 +#define RK817_GAS_GAUGE_Q_MAX_L1 0x84 +#define RK817_GAS_GAUGE_Q_MAX_L0 0x85 +#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_H 0x8f +#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_L 0x90 +#define RK817_GAS_GAUGE_CAL_OFFSET_H 0x91 +#define RK817_GAS_GAUGE_CAL_OFFSET_L 0x92 +#define RK817_GAS_GAUGE_VCALIB0_H 0x93 +#define RK817_GAS_GAUGE_VCALIB0_L 0x94 +#define RK817_GAS_GAUGE_VCALIB1_H 0x95 +#define RK817_GAS_GAUGE_VCALIB1_L 0x96 +#define RK817_GAS_GAUGE_IOFFSET_H 0x97 +#define RK817_GAS_GAUGE_IOFFSET_L 0x98 +#define RK817_GAS_GAUGE_BAT_R1 0x9a +#define RK817_GAS_GAUGE_BAT_R2 0x9b +#define RK817_GAS_GAUGE_BAT_R3 0x9c +#define RK817_GAS_GAUGE_DATA0 0x9d +#define RK817_GAS_GAUGE_DATA1 0x9e +#define RK817_GAS_GAUGE_DATA2 0x9f +#define RK817_GAS_GAUGE_DATA3 0xa0 +#define RK817_GAS_GAUGE_DATA4 0xa1 +#define RK817_GAS_GAUGE_DATA5 0xa2 +#define RK817_GAS_GAUGE_CUR_ADC_K0 0xb0 + #define RK817_POWER_EN_REG(i) (0xb1 + (i)) #define RK817_POWER_SLP_EN_REG(i) (0xb5 + (i)) @@ -544,10 +615,30 @@ enum rk809_reg_id { #define RK817_LDO_ON_VSEL_REG(idx) (0xcc + (idx) * 2) #define RK817_BOOST_OTG_CFG (0xde) +#define RK817_PMIC_CHRG_OUT 0xe4 +#define RK817_CHRG_VOL_SEL (0x07 << 4) +#define RK817_CHRG_CUR_SEL (0x07 << 0) + +#define RK817_PMIC_CHRG_IN 0xe5 +#define RK817_USB_VLIM_EN (0x01 << 7) +#define RK817_USB_VLIM_SEL (0x07 << 4) +#define RK817_USB_ILIM_EN (0x01 << 3) +#define RK817_USB_ILIM_SEL (0x07 << 0) +#define RK817_PMIC_CHRG_TERM 0xe6 +#define RK817_CHRG_TERM_ANA_DIG (0x01 << 2) +#define RK817_CHRG_TERM_ANA_SEL (0x03 << 0) +#define RK817_CHRG_EN (0x01 << 6) + +#define RK817_PMIC_CHRG_STS 0xeb +#define RK817_BAT_EXS BIT(7) +#define RK817_CHG_STS (0x07 << 4) + #define RK817_ID_MSB 0xed #define RK817_ID_LSB 0xee #define RK817_SYS_STS 0xf0 +#define RK817_PLUG_IN_STS (0x1 << 6) + #define RK817_SYS_CFG(i) (0xf1 + (i)) #define RK817_ON_SOURCE_REG 0xf5 diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 22c0a0cf5e0c..704a04f5a074 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -100,21 +100,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ -#if defined(CONFIG_MIGRATION) && defined(CONFIG_NUMA) -extern void set_migration_target_nodes(void); -extern void migrate_on_reclaim_init(void); -extern bool numa_demotion_enabled; -extern int next_demotion_node(int node); -#else -static inline void set_migration_target_nodes(void) {} -static inline void migrate_on_reclaim_init(void) {} -static inline int next_demotion_node(int node) -{ - return NUMA_NO_NODE; -} -#define numa_demotion_enabled false -#endif - #ifdef CONFIG_COMPACTION bool PageMovable(struct page *page); void __SetPageMovable(struct page *page, const struct movable_operations *ops); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 06574d430ff5..5a4e914e2a6f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,7 +1757,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 steering_format_version[0x4]; u8 create_qp_start_hint[0x18]; - u8 reserved_at_460[0x3]; + u8 reserved_at_460[0x1]; + u8 ats[0x1]; + u8 reserved_at_462[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x2]; u8 ipsec_offload[0x1]; @@ -3939,7 +3941,9 @@ struct mlx5_ifc_mkc_bits { u8 lw[0x1]; u8 lr[0x1]; u8 access_mode_1_0[0x2]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x2]; + u8 ma_translation_mode[0x2]; + u8 reserved_at_1c[0x4]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; @@ -11214,7 +11218,8 @@ struct mlx5_ifc_dealloc_memic_out_bits { struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; - u8 reserved_at_80[0x1b]; + u8 ats[0x1]; + u8 reserved_at_81[0x1a]; u8 log_page_size[0x5]; u8 page_offset[0x20]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 21f8b27bd9fd..8bbcccbc5565 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -661,6 +661,38 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } +static inline +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) +{ + return mas_find(&vmi->mas, max); +} + +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) +{ + /* + * Uses vma_find() to get the first VMA when the iterator starts. + * Calling mas_next() could skip the first entry. + */ + return vma_find(vmi, ULONG_MAX); +} + +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) +{ + return mas_prev(&vmi->mas, 0); +} + +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) +{ + return vmi->mas.index; +} + +#define for_each_vma(__vmi, __vma) \ + while (((__vma) = vma_next(&(__vmi))) != NULL) + +/* The MM code likes to work with exclusive end addresses */ +#define for_each_vma_range(__vmi, __vma, __end) \ + while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL) + #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow @@ -697,7 +729,9 @@ static inline unsigned int compound_order(struct page *page) */ static inline unsigned int folio_order(struct folio *folio) { - return compound_order(&folio->page); + if (!folio_test_large(folio)) + return 0; + return folio->_folio_order; } #include <linux/huge_mm.h> @@ -1255,6 +1289,18 @@ static inline int folio_nid(const struct folio *folio) } #ifdef CONFIG_NUMA_BALANCING +/* page access time bits needs to hold at least 4 seconds */ +#define PAGE_ACCESS_TIME_MIN_BITS 12 +#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS +#define PAGE_ACCESS_TIME_BUCKETS \ + (PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT) +#else +#define PAGE_ACCESS_TIME_BUCKETS 0 +#endif + +#define PAGE_ACCESS_TIME_MASK \ + (LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS) + static inline int cpu_pid_to_cpupid(int cpu, int pid) { return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); @@ -1318,12 +1364,25 @@ static inline void page_cpupid_reset_last(struct page *page) page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ + +static inline int xchg_page_access_time(struct page *page, int time) +{ + int last_time; + + last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + return last_time << PAGE_ACCESS_TIME_BUCKETS; +} #else /* !CONFIG_NUMA_BALANCING */ static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { return page_to_nid(page); /* XXX */ } +static inline int xchg_page_access_time(struct page *page, int time) +{ + return 0; +} + static inline int page_cpupid_last(struct page *page) { return page_to_nid(page); /* XXX */ @@ -1465,6 +1524,11 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline struct folio *pfn_folio(unsigned long pfn) +{ + return page_folio(pfn_to_page(pfn)); +} + static inline atomic_t *folio_pincount_ptr(struct folio *folio) { return &folio_page(folio, 1)->compound_pincount; @@ -1597,7 +1661,13 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline long folio_nr_pages(struct folio *folio) { - return compound_nr(&folio->page); + if (!folio_test_large(folio)) + return 1; +#ifdef CONFIG_64BIT + return folio->_folio_nr_pages; +#else + return 1L << folio->_folio_order; +#endif } /** @@ -1776,7 +1846,11 @@ extern void pagefault_out_of_memory(void); */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); +extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); +static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask) +{ + __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); +} #ifdef CONFIG_MMU extern bool can_do_mlock(void); @@ -1795,8 +1869,9 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); -void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, - unsigned long start, unsigned long end); +void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, + struct vm_area_struct *start_vma, unsigned long start, + unsigned long end); struct mmu_notifier_range; @@ -2495,7 +2570,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); -extern unsigned long find_min_pfn_with_active_regions(void); #ifndef CONFIG_NUMA static inline int early_pfn_to_nid(unsigned long pfn) @@ -2516,7 +2590,12 @@ extern void calculate_min_free_kbytes(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(unsigned int flags, nodemask_t *nodemask); + +extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); +static inline void show_mem(unsigned int flags, nodemask_t *nodemask) +{ + __show_mem(flags, nodemask, MAX_NR_ZONES - 1); +} extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -2593,14 +2672,15 @@ extern int __split_vma(struct mm_struct *, struct vm_area_struct *, extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, - struct rb_node **, struct rb_node *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); +void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas); + static inline int check_data_rlimit(unsigned long rlim, unsigned long new, unsigned long start, @@ -2648,8 +2728,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); -extern int __do_munmap(struct mm_struct *, unsigned long, size_t, - struct list_head *uf, bool downgrade); +extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm, + unsigned long start, size_t len, struct list_head *uf, + bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); @@ -2716,26 +2797,12 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); -/** - * find_vma_intersection() - Look up the first VMA which intersects the interval - * @mm: The process address space. - * @start_addr: The inclusive start user address. - * @end_addr: The exclusive end user address. - * - * Returns: The first VMA within the provided range, %NULL otherwise. Assumes - * start_addr < end_addr. +/* + * Look up the first VMA which intersects the interval [start_addr, end_addr) + * NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, - unsigned long start_addr, - unsigned long end_addr) -{ - struct vm_area_struct *vma = find_vma(mm, start_addr); - - if (vma && end_addr <= vma->vm_start) - vma = NULL; - return vma; -} + unsigned long start_addr, unsigned long end_addr); /** * vma_lookup() - Find a VMA at a specific address @@ -2747,12 +2814,7 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = find_vma(mm, addr); - - if (vma && addr < vma->vm_start) - vma = NULL; - - return vma; + return mtree_load(&mm->mm_mt, addr); } static inline unsigned long vm_start_gap(struct vm_area_struct *vma) @@ -2788,7 +2850,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end) { - struct vm_area_struct *vma = find_vma(mm, vm_start); + struct vm_area_struct *vma = vma_lookup(mm, vm_start); if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end)) vma = NULL; @@ -2888,7 +2950,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * and return without waiting upon it */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ @@ -2975,8 +3036,8 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq - * * GUP-fast and KSM or temporary unmapping (swap, migration): - * clear/invalidate+flush of the page table entry + * * GUP-fast and KSM or temporary unmapping (swap, migration): see + * page_try_share_anon_rmap() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a @@ -2997,6 +3058,11 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) */ if (!PageAnon(page)) return false; + + /* Paired with a memory barrier in page_try_share_anon_rmap(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_rmb(); + /* * Note that PageKsm() pages cannot be exclusive, and consequently, * cannot get pinned. @@ -3004,6 +3070,21 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) return !PageAnonExclusive(page); } +/* + * Indicates whether GUP can follow a PROT_NONE mapped page, or whether + * a (NUMA hinting) fault is required. + */ +static inline bool gup_can_follow_protnone(unsigned int flags) +{ + /* + * FOLL_FORCE has to be able to make progress even if the VMA is + * inaccessible. Further, FOLL_FORCE access usually does not represent + * application behaviour and we should avoid triggering NUMA hinting + * faults. + */ + return flags & FOLL_FORCE; +} + typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); @@ -3011,7 +3092,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); -extern void init_mem_debugging_and_hardening(void); +extern void __init init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern void __kernel_poison_pages(struct page *page, int numpages); extern void __kernel_unpoison_pages(struct page *page, int numpages); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 7b25b53c474a..e8ed225d8f7c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -34,15 +34,25 @@ static inline int page_is_file_lru(struct page *page) return folio_is_file_lru(page_folio(page)); } -static __always_inline void update_lru_size(struct lruvec *lruvec, +static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); + lockdep_assert_held(&lruvec->lru_lock); + WARN_ON_ONCE(nr_pages != (int)nr_pages); + __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); +} + +static __always_inline void update_lru_size(struct lruvec *lruvec, + enum lru_list lru, enum zone_type zid, + long nr_pages) +{ + __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif @@ -66,11 +76,6 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio) __folio_clear_unevictable(folio); } -static __always_inline void __clear_page_lru_flags(struct page *page) -{ - __folio_clear_lru_flags(page_folio(page)); -} - /** * folio_lru_list - Which LRU list should a folio be on? * @folio: The folio to test. @@ -94,11 +99,224 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return lru; } +#ifdef CONFIG_LRU_GEN + +#ifdef CONFIG_LRU_GEN_ENABLED +static inline bool lru_gen_enabled(void) +{ + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +} +#else +static inline bool lru_gen_enabled(void) +{ + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +} +#endif + +static inline bool lru_gen_in_fault(void) +{ + return current->in_lru_fault; +} + +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +static inline int lru_hist_from_seq(unsigned long seq) +{ + return seq % NR_HIST_GENS; +} + +static inline int lru_tier_from_refs(int refs) +{ + VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH)); + + /* see the comment in folio_lru_refs() */ + return order_base_2(refs + 1); +} + +static inline int folio_lru_refs(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + bool workingset = flags & BIT(PG_workingset); + + /* + * Return the number of accesses beyond PG_referenced, i.e., N-1 if the + * total number of accesses is N>1, since N=0,1 both map to the first + * tier. lru_tier_from_refs() will account for this off-by-one. Also see + * the comment on MAX_NR_TIERS. + */ + return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; +} + +static inline int folio_lru_gen(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = lruvec->lrugen.max_seq; + + VM_WARN_ON_ONCE(gen >= MAX_NR_GENS); + + /* see the comment on MIN_NR_GENS */ + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + enum lru_list lru = type * LRU_INACTIVE_FILE; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1); + + if (old_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], + lrugen->nr_pages[old_gen][type][zone] - delta); + if (new_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone], + lrugen->nr_pages[new_gen][type][zone] + delta); + + /* addition */ + if (old_gen < 0) { + if (lru_gen_is_active(lruvec, new_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + return; + } + + /* deletion */ + if (new_gen < 0) { + if (lru_gen_is_active(lruvec, old_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, -delta); + return; + } + + /* promotion */ + if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { + __update_lru_size(lruvec, lru, zone, -delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); + } + + /* demotion requires isolation, e.g., lru_deactivate_fn() */ + VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long seq; + unsigned long flags; + int gen = folio_lru_gen(folio); + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); + + if (folio_test_unevictable(folio) || !lrugen->enabled) + return false; + /* + * There are three common cases for this page: + * 1. If it's hot, e.g., freshly faulted in or previously hot and + * migrated, add it to the youngest generation. + * 2. If it's cold but can't be evicted immediately, i.e., an anon page + * not in swapcache or a dirty page pending writeback, add it to the + * second oldest generation. + * 3. Everything else (clean, cold) is added to the oldest generation. + */ + if (folio_test_active(folio)) + seq = lrugen->max_seq; + else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + seq = lrugen->min_seq[type] + 1; + else + seq = lrugen->min_seq[type]; + + gen = lru_gen_from_seq(seq); + flags = (gen + 1UL) << LRU_GEN_PGOFF; + /* see the comment on MIN_NR_GENS about PG_active */ + set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + + lru_gen_update_size(lruvec, folio, -1, gen); + /* for folio_rotate_reclaimable() */ + if (reclaiming) + list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + else + list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + + return true; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long flags; + int gen = folio_lru_gen(folio); + + if (gen < 0) + return false; + + VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); + + /* for folio_migrate_flags() */ + flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0; + flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags); + gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + + lru_gen_update_size(lruvec, folio, gen, -1); + list_del(&folio->lru); + + return true; +} + +#else /* !CONFIG_LRU_GEN */ + +static inline bool lru_gen_enabled(void) +{ + return false; +} + +static inline bool lru_gen_in_fault(void) +{ + return false; +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +#endif /* CONFIG_LRU_GEN */ + static __always_inline void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, false)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); if (lru != LRU_UNEVICTABLE) @@ -116,23 +334,23 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, true)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); /* This is not expected to be used on LRU_UNEVICTABLE */ list_add_tail(&folio->lru, &lruvec->lists[lru]); } -static __always_inline void add_page_to_lru_list_tail(struct page *page, - struct lruvec *lruvec) -{ - lruvec_add_folio_tail(lruvec, page_folio(page)); -} - static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_del_folio(lruvec, folio, false)) + return; + if (lru != LRU_UNEVICTABLE) list_del(&folio->lru); update_lru_size(lruvec, lru, folio_zonenum(folio), diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cf97f3884fda..500e536796ca 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> @@ -223,6 +224,18 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef CONFIG_KMSAN + /* + * KMSAN metadata for this page: + * - shadow page: every bit indicates whether the corresponding + * bit of the original page is initialized (0) or not (1); + * - origin page: every 4 bytes contain an id of the stack trace + * where the uninitialized value was created. + */ + struct page *kmsan_shadow; + struct page *kmsan_origin; +#endif + #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif @@ -244,6 +257,13 @@ struct page { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @_flags_1: For large folios, additional page flags. + * @__head: Points to the folio. Do not use. + * @_folio_dtor: Which destructor to use for this folio. + * @_folio_order: Do not use directly, call folio_order(). + * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). + * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -282,9 +302,17 @@ struct folio { }; struct page page; }; + unsigned long _flags_1; + unsigned long __head; + unsigned char _folio_dtor; + unsigned char _folio_order; + atomic_t _total_mapcount; + atomic_t _pincount; +#ifdef CONFIG_64BIT + unsigned int _folio_nr_pages; +#endif }; -static_assert(sizeof(struct page) == sizeof(struct folio)); #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); @@ -299,6 +327,19 @@ FOLIO_MATCH(_refcount, _refcount); FOLIO_MATCH(memcg_data, memcg_data); #endif #undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + sizeof(struct page)) +FOLIO_MATCH(flags, _flags_1); +FOLIO_MATCH(compound_head, __head); +FOLIO_MATCH(compound_dtor, _folio_dtor); +FOLIO_MATCH(compound_order, _folio_order); +FOLIO_MATCH(compound_mapcount, _total_mapcount); +FOLIO_MATCH(compound_pincount, _pincount); +#ifdef CONFIG_64BIT +FOLIO_MATCH(compound_nr, _folio_nr_pages); +#endif +#undef FOLIO_MATCH static inline atomic_t *folio_mapcount_ptr(struct folio *folio) { @@ -407,21 +448,6 @@ struct vm_area_struct { unsigned long vm_end; /* The first byte after our end address within vm_mm. */ - /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next, *vm_prev; - - struct rb_node vm_rb; - - /* - * Largest free memory gap in bytes to the left of this VMA. - * Either between this VMA and vma->vm_prev, or between one of the - * VMAs below us in the VMA rbtree and its ->vm_prev. This helps - * get_unmapped_area find a free area of the right size. - */ - unsigned long rb_subtree_gap; - - /* Second cache line starts here. */ - struct mm_struct *vm_mm; /* The address space we belong to. */ /* @@ -485,9 +511,7 @@ struct vm_area_struct { struct kioctx_table; struct mm_struct { struct { - struct vm_area_struct *mmap; /* list of VMAs */ - struct rb_root mm_rb; - u64 vmacache_seqnum; /* per-thread vmacache */ + struct maple_tree mm_mt; #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, @@ -501,7 +525,6 @@ struct mm_struct { unsigned long mmap_compat_legacy_base; #endif unsigned long task_size; /* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; #ifdef CONFIG_MEMBARRIER @@ -631,22 +654,22 @@ struct mm_struct { #endif #ifdef CONFIG_NUMA_BALANCING /* - * numa_next_scan is the next time that the PTEs will be marked - * pte_numa. NUMA hinting faults will gather statistics and - * migrate pages to new nodes if necessary. + * numa_next_scan is the next time that PTEs will be remapped + * PROT_NONE to trigger NUMA hinting faults; such faults gather + * statistics and migrate pages to new nodes if necessary. */ unsigned long numa_next_scan; - /* Restart point for scanning and setting pte_numa */ + /* Restart point for scanning and remapping PTEs. */ unsigned long numa_scan_offset; - /* numa_scan_seq prevents two threads setting pte_numa */ + /* numa_scan_seq prevents two threads remapping PTEs. */ int numa_scan_seq; #endif /* * An operation with batched TLB flushing is going on. Anything * that can move process memory needs to flush the TLB when - * moving a PROT_NONE or PROT_NUMA mapped page. + * moving a PROT_NONE mapped page. */ atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH @@ -671,7 +694,28 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * Represent how many pages are checked for ksm merging + * including merged and not merged. + */ + unsigned long ksm_rmap_items; +#endif +#ifdef CONFIG_LRU_GEN + struct { + /* this mm_struct is on lru_gen_mm_list */ + struct list_head list; + /* + * Set when switching to this mm_struct, as a hint of + * whether it has been used since the last time per-node + * page table walkers cleared the corresponding bits. + */ + unsigned long bitmap; +#ifdef CONFIG_MEMCG + /* points to the memcg of "owner" above */ + struct mem_cgroup *memcg; #endif + } lru_gen; +#endif /* CONFIG_LRU_GEN */ } __randomize_layout; /* @@ -681,6 +725,7 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ @@ -698,6 +743,87 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +struct lru_gen_mm_list { + /* mm_struct list for page table walkers */ + struct list_head fifo; + /* protects the list above */ + spinlock_t lock; +}; + +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->lru_gen.list); + mm->lru_gen.bitmap = 0; +#ifdef CONFIG_MEMCG + mm->lru_gen.memcg = NULL; +#endif +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ + /* + * When the bitmap is set, page reclaim knows this mm_struct has been + * used since the last time it cleared the bitmap. So it might be worth + * walking the page tables of this mm_struct to clear the accessed bit. + */ + WRITE_ONCE(mm->lru_gen.bitmap, -1); +} + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_add_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_del_mm(struct mm_struct *mm) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_migrate_mm(struct mm_struct *mm) +{ +} +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_LRU_GEN */ + +struct vma_iterator { + struct ma_state mas; +}; + +#define VMA_ITERATOR(name, __mm, __addr) \ + struct vma_iterator name = { \ + .mas = { \ + .tree = &(__mm)->mm_mt, \ + .index = __addr, \ + .node = MAS_START, \ + }, \ + } + +static inline void vma_iter_init(struct vma_iterator *vmi, + struct mm_struct *mm, unsigned long addr) +{ + vmi->mas.tree = &mm->mm_mt; + vmi->mas.index = addr; + vmi->mas.node = MAS_START; +} + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index c1bc6731125c..0bb4b6da9993 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -25,18 +25,6 @@ #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* - * The per task VMA cache array: - */ -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -struct vmacache { - u64 seqnum; - struct vm_area_struct *vmas[VMACACHE_SIZE]; -}; - -/* * When updating this, please also update struct resident_page_types[] in * kernel/fork.c */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 8a30de08e913..c726ea781255 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -293,6 +293,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ +#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 15ae78cd2853..b8728d11c949 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif +#ifdef CONFIG_DEBUG_VM_IRQSOFF +#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled()) +#else +#define VM_WARN_ON_IRQS_ENABLED() do { } while (0) +#endif + #ifdef CONFIG_DEBUG_VIRTUAL #define VIRTUAL_BUG_ON(cond) BUG_ON(cond) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e24b40c52468..5f74891556f3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -24,10 +24,10 @@ #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ -#ifndef CONFIG_FORCE_MAX_ZONEORDER +#ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_ORDER 11 #else -#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER +#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) @@ -121,20 +121,6 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) struct pglist_data; -/* - * Add a wild amount of padding here to ensure data fall into separate - * cachelines. There are very few zone structures in the machine, so space - * consumption is not a concern here. - */ -#if defined(CONFIG_SMP) -struct zone_padding { - char x[0]; -} ____cacheline_internodealigned_in_smp; -#define ZONE_PADDING(name) struct zone_padding name; -#else -#define ZONE_PADDING(name) -#endif - #ifdef CONFIG_NUMA enum numa_stat_item { NUMA_HIT, /* allocated in intended node */ @@ -216,11 +202,13 @@ enum node_stat_item { NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ + NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */ #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ + PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif NR_VM_NODE_STAT_ITEMS }; @@ -306,6 +294,8 @@ static inline bool is_active_lru(enum lru_list lru) return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } +#define WORKINGSET_ANON 0 +#define WORKINGSET_FILE 1 #define ANON_AND_FILE 2 enum lruvec_flags { @@ -314,6 +304,207 @@ enum lruvec_flags { */ }; +#endif /* !__GENERATING_BOUNDS_H */ + +/* + * Evictable pages are divided into multiple generations. The youngest and the + * oldest generation numbers, max_seq and min_seq, are monotonically increasing. + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the + * corresponding generation. The gen counter in folio->flags stores gen+1 while + * a page is on one of lrugen->lists[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the + * eviction. The first check takes care of the accessed bit set on the initial + * fault; the second check makes sure this page hasn't been used since then. + * This process, AKA second chance, requires a minimum of two generations, + * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive + * LRU, e.g., /proc/vmstat, these two generations are considered active; the + * rest of generations, if they exist, are considered inactive. See + * lru_gen_is_active(). + * + * PG_active is always cleared while a page is on one of lrugen->lists[] so that + * the aging needs not to worry about it. And it's set again when a page + * considered active is isolated for non-reclaiming purposes, e.g., migration. + * See lru_gen_add_folio() and lru_gen_del_folio(). + * + * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits + * in folio->flags. + */ +#define MIN_NR_GENS 2U +#define MAX_NR_GENS 4U + +/* + * Each generation is divided into multiple tiers. A page accessed N times + * through file descriptors is in tier order_base_2(N). A page in the first tier + * (N=0,1) is marked by PG_referenced unless it was faulted in through page + * tables or read ahead. A page in any other tier (N>1) is marked by + * PG_referenced and PG_workingset. This implies a minimum of two tiers is + * supported without using additional bits in folio->flags. + * + * In contrast to moving across generations which requires the LRU lock, moving + * across tiers only involves atomic operations on folio->flags and therefore + * has a negligible cost in the buffered access path. In the eviction path, + * comparisons of refaulted/(evicted+protected) from the first tier and the + * rest infer whether pages accessed multiple times through file descriptors + * are statistically hot and thus worth protecting. + * + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in + * folio->flags. + */ +#define MAX_NR_TIERS 4U + +#ifndef __GENERATING_BOUNDS_H + +struct lruvec; +struct page_vma_mapped_walk; + +#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) +#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) + +#ifdef CONFIG_LRU_GEN + +enum { + LRU_GEN_ANON, + LRU_GEN_FILE, +}; + +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + +#define MIN_LRU_BATCH BITS_PER_LONG +#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) + +/* whether to keep historical stats from evicted generations */ +#ifdef CONFIG_LRU_GEN_STATS +#define NR_HIST_GENS MAX_NR_GENS +#else +#define NR_HIST_GENS 1U +#endif + +/* + * The youngest generation number is stored in max_seq for both anon and file + * types as they are aged on an equal footing. The oldest generation numbers are + * stored in min_seq[] separately for anon and file types as clean file pages + * can be evicted regardless of swap constraints. + * + * Normally anon and file min_seq are in sync. But if swapping is constrained, + * e.g., out of swap space, file min_seq is allowed to advance and leave anon + * min_seq behind. + * + * The number of pages in each generation is eventually consistent and therefore + * can be transiently negative when reset_batch_size() is pending. + */ +struct lru_gen_struct { + /* the aging increments the youngest generation number */ + unsigned long max_seq; + /* the eviction increments the oldest generation numbers */ + unsigned long min_seq[ANON_AND_FILE]; + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists, lazily sorted on eviction */ + struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the multi-gen LRU sizes, eventually consistent */ + long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ + unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; + /* the exponential moving average of evicted+protected */ + unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; + /* the first tier doesn't need protection, hence the minus one */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can be modified without holding the LRU lock */ + atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; +}; + +enum { + MM_LEAF_TOTAL, /* total leaf entries */ + MM_LEAF_OLD, /* old leaf entries */ + MM_LEAF_YOUNG, /* young leaf entries */ + MM_NONLEAF_TOTAL, /* total non-leaf entries */ + MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ + MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +}; + +/* double-buffering Bloom filters */ +#define NR_BLOOM_FILTERS 2 + +struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; + /* where the current iteration continues (inclusive) */ + struct list_head *head; + /* where the last iteration ended (exclusive) */ + struct list_head *tail; + /* to wait for the last page table walker to finish */ + struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; + /* the number of concurrent page table walkers */ + int nr_walkers; +}; + +struct lru_gen_mm_walk { + /* the lruvec under reclaim */ + struct lruvec *lruvec; + /* unstable max_seq from lru_gen_struct */ + unsigned long max_seq; + /* the next address within an mm to scan */ + unsigned long next_addr; + /* to batch promoted pages */ + int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* to batch the mm stats */ + int mm_stats[NR_MM_STATS]; + /* total batched items */ + int batched; + bool can_swap; + bool force_scan; +}; + +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg); +void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#endif + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_init_lruvec(struct lruvec *lruvec) +{ +} + +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ +} +#endif + +#endif /* CONFIG_LRU_GEN */ + struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ @@ -331,6 +522,12 @@ struct lruvec { unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* evictable pages divided into generations */ + struct lru_gen_struct lrugen; + /* to concurrently iterate lru_gen_mm_list */ + struct lru_gen_mm_state mm_state; +#endif #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -368,13 +565,6 @@ enum zone_watermarks { #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) -/* - * Shift to encode migratetype and order in the same integer, with order - * in the least significant bits. - */ -#define NR_PCP_ORDER_WIDTH 8 -#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) - #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) @@ -627,7 +817,7 @@ struct zone { int initialized; /* Write-intensive fields used from the page allocator */ - ZONE_PADDING(_pad1_) + CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; @@ -639,7 +829,7 @@ struct zone { spinlock_t lock; /* Write-intensive fields used by compaction and vmstats. */ - ZONE_PADDING(_pad2_) + CACHELINE_PADDING(_pad2_); /* * When free pages are below this point, additional steps are taken @@ -676,7 +866,7 @@ struct zone { bool contiguous; - ZONE_PADDING(_pad3_) + CACHELINE_PADDING(_pad3_); /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; @@ -746,6 +936,8 @@ static inline bool zone_is_empty(struct zone *zone) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -953,8 +1145,10 @@ typedef struct pglist_data { atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ - struct task_struct *kswapd; /* Protected by - mem_hotplug_begin/done() */ +#ifdef CONFIG_MEMORY_HOTPLUG + struct mutex kswapd_lock; +#endif + struct task_struct *kswapd; /* Protected by kswapd_lock */ int kswapd_order; enum zone_type kswapd_highest_zoneidx; @@ -982,7 +1176,7 @@ typedef struct pglist_data { #endif /* CONFIG_NUMA */ /* Write-intensive fields used by page reclaim */ - ZONE_PADDING(_pad1_) + CACHELINE_PADDING(_pad1_); #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -996,6 +1190,21 @@ typedef struct pglist_data { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_NUMA_BALANCING + /* start time in ms of current promote rate limit period */ + unsigned int nbp_rl_start; + /* number of promote candidate pages at start time of current rate limit period */ + unsigned long nbp_rl_nr_cand; + /* promote threshold in ms */ + unsigned int nbp_threshold; + /* start time in ms of current promote threshold adjustment period */ + unsigned int nbp_th_start; + /* + * number of promote candidate pages at stat time of current promote + * threshold adjustment period + */ + unsigned long nbp_th_nr_cand; +#endif /* Fields commonly accessed by the page reclaim scanner */ /* @@ -1007,11 +1216,19 @@ typedef struct pglist_data { unsigned long flags; - ZONE_PADDING(_pad2_) +#ifdef CONFIG_LRU_GEN + /* kswap mm walk data */ + struct lru_gen_mm_walk mm_walk; +#endif + + CACHELINE_PADDING(_pad2_); /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA + struct memory_tier __rcu *memtier; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -1025,11 +1242,6 @@ static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) return pgdat->node_start_pfn + pgdat->node_spanned_pages; } -static inline bool pgdat_is_empty(pg_data_t *pgdat) -{ - return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; -} - #include <linux/memory_hotplug.h> void build_all_zonelists(pg_data_t *pgdat); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 955aee14b0f7..7c58c44662b8 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -40,6 +40,12 @@ struct mtd_erase_region_info { unsigned long *lockmap; /* If keeping bitmap of locks */ }; +struct mtd_req_stats { + unsigned int uncorrectable_errors; + unsigned int corrected_bitflips; + unsigned int max_bitflips; +}; + /** * struct mtd_oob_ops - oob operation operands * @mode: operation mode @@ -70,6 +76,7 @@ struct mtd_oob_ops { uint32_t ooboffs; uint8_t *datbuf; uint8_t *oobbuf; + struct mtd_req_stats *stats; }; /** @@ -677,6 +684,7 @@ extern int mtd_device_unregister(struct mtd_info *master); extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num); extern int __get_mtd_device(struct mtd_info *mtd); extern void __put_mtd_device(struct mtd_info *mtd); +extern struct mtd_info *of_get_mtd_device_by_node(struct device_node *np); extern struct mtd_info *get_mtd_device_nm(const char *name); extern void put_mtd_device(struct mtd_info *mtd); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eddf8ee270e7..a36edb0ec199 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3663,9 +3663,8 @@ static inline bool netif_attr_test_online(unsigned long j, static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) { - /* -1 is a legal arg here. */ - if (n != -1) - cpu_max_bits_warn(n, nr_bits); + /* n is a prior cpu */ + cpu_max_bits_warn(n + 1, nr_bits); if (srcp) return find_next_bit(srcp, nr_bits, n + 1); @@ -3686,9 +3685,8 @@ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, unsigned int nr_bits) { - /* -1 is a legal arg here. */ - if (n != -1) - cpu_max_bits_warn(n, nr_bits); + /* n is a prior cpu */ + cpu_max_bits_warn(n + 1, nr_bits); if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); diff --git a/include/linux/node.h b/include/linux/node.h index 40d641a8bfb0..427a5975cf40 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -2,15 +2,15 @@ /* * include/linux/node.h - generic node definition * - * This is mainly for topological representation. We define the - * basic 'struct node' here, which can be embedded in per-arch + * This is mainly for topological representation. We define the + * basic 'struct node' here, which can be embedded in per-arch * definitions of processors. * * Basic handling of the devices is done in drivers/base/node.c - * and system devices are handled in drivers/base/sys.c. + * and system devices are handled in drivers/base/sys.c. * * Nodes are exported via driverfs in the class/node/devices/ - * directory. + * directory. */ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ @@ -18,7 +18,6 @@ #include <linux/device.h> #include <linux/cpumask.h> #include <linux/list.h> -#include <linux/workqueue.h> /** * struct node_hmem_attrs - heterogeneous memory performance attributes @@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid, struct node { struct device dev; struct list_head access_list; - -#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) - struct work_struct node_work; -#endif #ifdef CONFIG_HMEM_REPORTING struct list_head cache_attrs; struct device *cache_dev; @@ -96,7 +91,6 @@ struct node { struct memory_block; extern struct node *node_devices[]; -typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) void register_memory_blocks_under_node(int nid, unsigned long start_pfn, @@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, unsigned access); - -#ifdef CONFIG_HUGETLBFS -extern void register_hugetlbfs_with_node(node_registration_func_t doregister, - node_registration_func_t unregister); -#endif #else static inline void node_dev_init(void) { @@ -176,18 +165,8 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { } - -static inline void register_hugetlbfs_with_node(node_registration_func_t reg, - node_registration_func_t unreg) -{ -} #endif #define to_node(device) container_of(device, struct node, dev) -static inline bool node_is_toptier(int node) -{ - return node_state(node, N_CPU); -} - #endif /* _LINUX_NODE_H_ */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 4b71a96190a8..378956c93c94 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -493,6 +493,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U @@ -504,12 +505,20 @@ static inline int num_node_state(enum node_states state) static inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) - int w, bit = NUMA_NO_NODE; + int w, bit; w = nodes_weight(*maskp); - if (w) - bit = bitmap_ord_to_pos(maskp->bits, - get_random_int() % w, MAX_NUMNODES); + switch (w) { + case 0: + bit = NUMA_NO_NODE; + break; + case 1: + bit = first_node(*maskp); + break; + default: + bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_int() % w); + break; + } return bit; #else return 0; diff --git a/include/linux/of.h b/include/linux/of.h index 766d002bddb9..6b79ef9a6541 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -342,7 +342,7 @@ extern int of_property_read_string_helper(const struct device_node *np, const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); -extern int of_device_compatible_match(struct device_node *device, +extern int of_device_compatible_match(const struct device_node *device, const char *const *compat); extern bool of_device_is_available(const struct device_node *device); extern bool of_device_is_big_endian(const struct device_node *device); @@ -562,7 +562,7 @@ static inline int of_device_is_compatible(const struct device_node *device, return 0; } -static inline int of_device_compatible_match(struct device_node *device, +static inline int of_device_compatible_match(const struct device_node *device, const char *const *compat) { return 0; diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 83fccd0c9bba..d6d3eae2f145 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -37,9 +37,8 @@ extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data); extern int of_irq_to_resource(struct device_node *dev, int index, struct resource *r); -extern void of_irq_init(const struct of_device_id *matches); - #ifdef CONFIG_OF_IRQ +extern void of_irq_init(const struct of_device_id *matches); extern int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq); extern int of_irq_count(struct device_node *dev); @@ -57,6 +56,9 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, extern void of_msi_configure(struct device *dev, struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else +static inline void of_irq_init(const struct of_device_id *matches) +{ +} static inline int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq) { diff --git a/include/linux/oom.h b/include/linux/oom.h index 02d1e7bbd8cd..7d0c9c48a0c5 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -78,15 +78,6 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) } /* - * Use this helper if tsk->mm != mm and the victim mm needs a special - * handling. This is guaranteed to stay true after once set. - */ -static inline bool mm_is_oom_victim(struct mm_struct *mm) -{ - return test_bit(MMF_OOM_VICTIM, &mm->flags); -} - -/* * Checks whether a page fault on the given mm is still reliable. * This is no longer true if the oom reaper started to reap the * address space which is reflected by MMF_UNSTABLE flag set in @@ -106,8 +97,6 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) return 0; } -bool __oom_reap_task_mm(struct mm_struct *mm); - long oom_badness(struct task_struct *p, unsigned long totalpages); diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index ef1e3e736e14..7d79818dc065 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -55,7 +55,8 @@ #define SECTIONS_WIDTH 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" @@ -89,8 +90,8 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \ - <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 @@ -100,10 +101,15 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #error "Not enough bits in page flags" #endif +/* see the comment on MAX_NR_TIERS */ +#define LRU_REFS_WIDTH min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \ + ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ + NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) + #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 465ff35a8c00..0b0ae5084e60 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. @@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (PAGEFLAGS_MASK & ~__PG_HWPOISON) + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 679591301994..c141ea9a95ef 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -3,15 +3,17 @@ #define _LINUX_PAGE_COUNTER_H #include <linux/atomic.h> +#include <linux/cache.h> #include <linux/kernel.h> #include <asm/page.h> struct page_counter { + /* + * Make sure 'usage' does not share cacheline with any other field. The + * memcg->memory.usage is a hot member of struct mem_cgroup. + */ atomic_long_t usage; - unsigned long min; - unsigned long low; - unsigned long high; - unsigned long max; + CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ unsigned long emin; @@ -23,18 +25,18 @@ struct page_counter { atomic_long_t low_usage; atomic_long_t children_low_usage; - /* legacy */ unsigned long watermark; unsigned long failcnt; - /* - * 'parent' is placed here to be far from 'usage' to reduce - * cache false sharing, as 'usage' is written mostly while - * parent is frequently read for cgroup's hierarchical - * counting nature. - */ + /* Keep all the read most fields in a separete cacheline. */ + CACHELINE_PADDING(_pad2_); + + unsigned long min; + unsigned long low; + unsigned long high; + unsigned long max; struct page_counter *parent; -}; +} ____cacheline_internodealigned_in_smp; #if BITS_PER_LONG == 32 #define PAGE_COUNTER_MAX LONG_MAX diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index fabb2e1e087f..22be4582faae 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -36,9 +36,15 @@ struct page_ext { unsigned long flags; }; +extern bool early_page_ext; extern unsigned long page_ext_size; extern void pgdat_page_ext_init(struct pglist_data *pgdat); +static inline bool early_page_ext_enabled(void) +{ + return early_page_ext; +} + #ifdef CONFIG_SPARSEMEM static inline void page_ext_init_flatmem(void) { @@ -55,7 +61,8 @@ static inline void page_ext_init(void) } #endif -struct page_ext *lookup_page_ext(const struct page *page); +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); static inline struct page_ext *page_ext_next(struct page_ext *curr) { @@ -67,13 +74,13 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr) #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; -static inline void pgdat_page_ext_init(struct pglist_data *pgdat) +static inline bool early_page_ext_enabled(void) { + return false; } -static inline struct page_ext *lookup_page_ext(const struct page *page) +static inline void pgdat_page_ext_init(struct pglist_data *pgdat) { - return NULL; } static inline void page_ext_init(void) @@ -87,5 +94,14 @@ static inline void page_ext_init_flatmem_late(void) static inline void page_ext_init_flatmem(void) { } + +static inline struct page_ext *page_ext_get(struct page *page) +{ + return NULL; +} + +static inline void page_ext_put(struct page_ext *page_ext) +{ +} #endif /* CONFIG_PAGE_EXTENSION */ #endif /* __LINUX_PAGE_EXT_H */ diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 4663dfed1293..5cb7bd2078ec 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -13,65 +13,79 @@ * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ - static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); } static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_idle; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); + + return page_idle; } static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } #endif /* !CONFIG_64BIT */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 83c7248053a1..5f1ae07d724b 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -53,6 +53,10 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE */ #define pageblock_nr_pages (1UL << pageblock_order) +#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +#define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages) +#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) +#define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages) /* Forward declaration */ struct page; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 201dc7281640..bbccb4044222 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -718,8 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); -unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, - unsigned int nr_pages, struct page **pages); +unsigned filemap_get_folios_contig(struct address_space *mapping, + pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned int nr_pages, struct page **pages); @@ -989,19 +989,16 @@ static inline int lock_page_killable(struct page *page) } /* - * lock_page_or_retry - Lock the page, unless this would block and the + * folio_lock_or_retry - Lock the folio, unless this would block and the * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see * __folio_lock_or_retry(). */ -static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, - unsigned int flags) +static inline bool folio_lock_or_retry(struct folio *folio, + struct mm_struct *mm, unsigned int flags) { - struct folio *folio; might_sleep(); - - folio = page_folio(page); return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } @@ -1042,7 +1039,6 @@ static inline int wait_on_page_locked_killable(struct page *page) return folio_wait_locked_killable(page_folio(page)); } -int folio_put_wait_locked(struct folio *folio, int state); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index ac7b38ad5903..f3fafb731ffd 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -15,12 +15,12 @@ struct mm_walk; * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to * split_huge_page() instead of handling it explicitly. - * @pte_entry: if set, called for each non-empty PTE (lowest-level) - * entry + * @pte_entry: if set, called for each PTE (lowest-level) entry, + * including empty ones * @pte_hole: if set, called for each hole at all levels, - * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD - * 4:PTE. Any folded depths (where PTRS_PER_P?D is equal - * to 1) are skipped. + * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. + * Any folded depths (where PTRS_PER_P?D is equal to 1) + * are skipped. * @hugetlb_entry: if set, called for each hugetlb entry * @test_walk: caller specific callback function to determine whether * we walk over the current vma or not. Returning 0 means diff --git a/include/linux/pci.h b/include/linux/pci.h index 5da0846aa3c1..2bda4a4e47e8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -475,6 +475,7 @@ struct pci_dev { unsigned int broken_cmd_compl:1; /* No compl for some cmds */ #endif #ifdef CONFIG_PCIE_PTM + u16 ptm_cap; /* PTM Capability */ unsigned int ptm_root:1; unsigned int ptm_enabled:1; u8 ptm_granularity; @@ -1677,10 +1678,12 @@ bool pci_ats_disabled(void); #ifdef CONFIG_PCIE_PTM int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); +void pci_disable_ptm(struct pci_dev *dev); bool pcie_ptm_enabled(struct pci_dev *dev); #else static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) { return -EINVAL; } +static inline void pci_disable_ptm(struct pci_dev *dev) { } static inline bool pcie_ptm_enabled(struct pci_dev *dev) { return false; } #endif diff --git a/include/linux/pe.h b/include/linux/pe.h index daf09ffffe38..1d3836ef9d92 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -65,6 +65,8 @@ #define IMAGE_FILE_MACHINE_SH5 0x01a8 #define IMAGE_FILE_MACHINE_THUMB 0x01c2 #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 +#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 /* flags */ #define IMAGE_FILE_RELOCS_STRIPPED 0x0001 diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5fda40f97fe9..36b942b67b7d 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); +static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) +{ + return atomic_read(&sem->block); +} + extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 01861eebed79..8ed5fba6d156 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -15,6 +15,9 @@ #include <linux/types.h> #include <linux/gfp.h> +/* percpu_counter batch for local add or sub */ +#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX + #ifdef CONFIG_SMP struct percpu_counter { @@ -56,6 +59,22 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +/* + * With percpu_counter_add_local() and percpu_counter_sub_local(), counts + * are accumulated in local per cpu counter and not in fbc->count until + * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter + * write efficient. + * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be + * used to add up the counts from each CPU to account for all the local + * counts. So percpu_counter_add_local() and percpu_counter_sub_local() + * should be used when a counter is updated frequently and read rarely. + */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH); +} + static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { s64 ret = __percpu_counter_sum(fbc); @@ -138,6 +157,13 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) preempt_enable(); } +/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add(fbc, amount); +} + static inline void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { @@ -193,4 +219,10 @@ static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount) percpu_counter_add(fbc, -amount); } +static inline void +percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_local(fbc, -amount); +} + #endif /* _LINUX_PERCPU_COUNTER_H */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0407a38b470a..0356cb6a215d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,11 @@ /* * ARM PMU hw_event flags */ -/* Event uses a 64bit counter */ -#define ARMPMU_EVT_64BIT 1 -/* Event uses a 47bit counter */ -#define ARMPMU_EVT_47BIT 2 +#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */ +#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */ + +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT); +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT); #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index bf66fe011fa8..e17e86ad6f3a 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -45,7 +45,7 @@ struct riscv_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); - int num_counters; + unsigned long cmask; u64 (*ctr_read)(struct perf_event *event); int (*ctr_get_idx)(struct perf_event *event); int (*ctr_get_width)(int idx); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ee8b9ecdc03b..853f64b6c8c2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -36,6 +36,7 @@ struct perf_guest_info_callbacks { }; #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include <linux/rhashtable-types.h> #include <asm/hw_breakpoint.h> #endif @@ -60,6 +61,7 @@ struct perf_guest_info_callbacks { #include <linux/refcount.h> #include <linux/security.h> #include <linux/static_call.h> +#include <linux/lockdep.h> #include <asm/local.h> struct perf_callchain_entry { @@ -137,9 +139,11 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x0000ffff +#define PERF_EVENT_FLAG_ARCH 0x000fffff #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); + /** * struct hw_perf_event - performance event hardware details: */ @@ -178,7 +182,7 @@ struct hw_perf_event { * creation and event initalization. */ struct arch_hw_breakpoint info; - struct list_head bp_list; + struct rhlist_head bp_list; }; #endif struct { /* amd_iommu */ @@ -631,7 +635,23 @@ struct pmu_event_list { struct list_head list; }; +/* + * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex + * as such iteration must hold either lock. However, since ctx->lock is an IRQ + * safe lock, and is only held by the CPU doing the modification, having IRQs + * disabled is sufficient since it will hold-off the IPIs. + */ +#ifdef CONFIG_PROVE_LOCKING +#define lockdep_assert_event_ctx(event) \ + WARN_ON_ONCE(__lockdep_enabled && \ + (this_cpu_read(hardirqs_enabled) && \ + lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) +#else +#define lockdep_assert_event_ctx(event) +#endif + #define for_each_sibling_event(sibling, event) \ + lockdep_assert_event_ctx(event); \ if ((event)->group_leader == (event)) \ list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) @@ -1007,18 +1027,20 @@ struct perf_sample_data { * Fields set by perf_sample_data_init(), group so as to * minimize the cachelines touched. */ - u64 addr; - struct perf_raw_record *raw; - struct perf_branch_stack *br_stack; + u64 sample_flags; u64 period; - union perf_sample_weight weight; - u64 txn; - union perf_mem_data_src data_src; /* * The other fields, optionally {set,used} by * perf_{prepare,output}_sample(). */ + struct perf_branch_stack *br_stack; + union perf_sample_weight weight; + union perf_mem_data_src data_src; + u64 txn; + u64 addr; + struct perf_raw_record *raw; + u64 type; u64 ip; struct { @@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { /* remaining struct members initialized in perf_prepare_sample() */ - data->addr = addr; - data->raw = NULL; - data->br_stack = NULL; + data->sample_flags = PERF_SAMPLE_PERIOD; data->period = period; - data->weight.full = 0; - data->data_src.val = PERF_MEM_NA; - data->txn = 0; + + if (addr) { + data->addr = addr; + data->sample_flags |= PERF_SAMPLE_ADDR; + } } /* @@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b br->abort = 0; br->cycles = 0; br->type = 0; + br->spec = PERF_BR_SPEC_NA; br->reserved = 0; } @@ -1684,4 +1707,30 @@ static inline void perf_lopwr_cb(bool mode) } #endif +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 014ee8f0fbaa..a108b60a6962 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, @@ -1276,8 +1289,7 @@ static inline int pgd_devmap(pgd_t pgd) #endif #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ - (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ - !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) + !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static inline int pud_trans_huge(pud_t pud) { return 0; @@ -1598,11 +1610,7 @@ typedef unsigned int pgtbl_mod_mask; #endif #ifndef has_transparent_hugepage -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define has_transparent_hugepage() 1 -#else -#define has_transparent_hugepage() 0 -#endif +#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) #endif /* diff --git a/include/linux/phy/pcie.h b/include/linux/phy/pcie.h new file mode 100644 index 000000000000..e7ac81764576 --- /dev/null +++ b/include/linux/phy/pcie.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Rockchip Electronics Co., Ltd. + */ +#ifndef __PHY_PCIE_H +#define __PHY_PCIE_H + +#define PHY_MODE_PCIE_RC 20 +#define PHY_MODE_PCIE_EP 21 +#define PHY_MODE_PCIE_BIFURCATION 22 + +#endif diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 3a35e74cdc61..70998e6dd6fd 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. */ #ifndef PHY_TEGRA_XUSB_H @@ -21,6 +21,8 @@ int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl, unsigned int port, bool enable); int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl, bool val); +void tegra_phy_xusb_utmi_pad_power_on(struct phy *phy); +void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); diff --git a/include/linux/platform_data/adp5588.h b/include/linux/platform_data/adp5588.h deleted file mode 100644 index 6d3f7d911a92..000000000000 --- a/include/linux/platform_data/adp5588.h +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller - * - * Copyright 2009-2010 Analog Devices Inc. - */ - -#ifndef _ADP5588_H -#define _ADP5588_H - -#define DEV_ID 0x00 /* Device ID */ -#define CFG 0x01 /* Configuration Register1 */ -#define INT_STAT 0x02 /* Interrupt Status Register */ -#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ -#define Key_EVENTA 0x04 /* Key Event Register A */ -#define Key_EVENTB 0x05 /* Key Event Register B */ -#define Key_EVENTC 0x06 /* Key Event Register C */ -#define Key_EVENTD 0x07 /* Key Event Register D */ -#define Key_EVENTE 0x08 /* Key Event Register E */ -#define Key_EVENTF 0x09 /* Key Event Register F */ -#define Key_EVENTG 0x0A /* Key Event Register G */ -#define Key_EVENTH 0x0B /* Key Event Register H */ -#define Key_EVENTI 0x0C /* Key Event Register I */ -#define Key_EVENTJ 0x0D /* Key Event Register J */ -#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ -#define UNLOCK1 0x0F /* Unlock Key1 */ -#define UNLOCK2 0x10 /* Unlock Key2 */ -#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ -#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ -#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ -#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ -#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ -#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ -#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ -#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ -#define GPI_EM1 0x20 /* GPI Event Mode 1 */ -#define GPI_EM2 0x21 /* GPI Event Mode 2 */ -#define GPI_EM3 0x22 /* GPI Event Mode 3 */ -#define GPIO_DIR1 0x23 /* GPIO Data Direction */ -#define GPIO_DIR2 0x24 /* GPIO Data Direction */ -#define GPIO_DIR3 0x25 /* GPIO Data Direction */ -#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ -#define Debounce_DIS1 0x29 /* Debounce Disable */ -#define Debounce_DIS2 0x2A /* Debounce Disable */ -#define Debounce_DIS3 0x2B /* Debounce Disable */ -#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ -#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ -#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ -#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ -#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ -#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ -#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ -#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ -#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ -#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ -#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ -#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ -#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ -#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ -#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ -#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ -#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ -#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ - -#define ADP5588_DEVICE_ID_MASK 0xF - - /* Configuration Register1 */ -#define ADP5588_AUTO_INC (1 << 7) -#define ADP5588_GPIEM_CFG (1 << 6) -#define ADP5588_OVR_FLOW_M (1 << 5) -#define ADP5588_INT_CFG (1 << 4) -#define ADP5588_OVR_FLOW_IEN (1 << 3) -#define ADP5588_K_LCK_IM (1 << 2) -#define ADP5588_GPI_IEN (1 << 1) -#define ADP5588_KE_IEN (1 << 0) - -/* Interrupt Status Register */ -#define ADP5588_CMP2_INT (1 << 5) -#define ADP5588_CMP1_INT (1 << 4) -#define ADP5588_OVR_FLOW_INT (1 << 3) -#define ADP5588_K_LCK_INT (1 << 2) -#define ADP5588_GPI_INT (1 << 1) -#define ADP5588_KE_INT (1 << 0) - -/* Key Lock and Event Counter Register */ -#define ADP5588_K_LCK_EN (1 << 6) -#define ADP5588_LCK21 0x30 -#define ADP5588_KEC 0xF - -#define ADP5588_MAXGPIO 18 -#define ADP5588_BANK(offs) ((offs) >> 3) -#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) - -/* Put one of these structures in i2c_board_info platform_data */ - -#define ADP5588_KEYMAPSIZE 80 - -#define GPI_PIN_ROW0 97 -#define GPI_PIN_ROW1 98 -#define GPI_PIN_ROW2 99 -#define GPI_PIN_ROW3 100 -#define GPI_PIN_ROW4 101 -#define GPI_PIN_ROW5 102 -#define GPI_PIN_ROW6 103 -#define GPI_PIN_ROW7 104 -#define GPI_PIN_COL0 105 -#define GPI_PIN_COL1 106 -#define GPI_PIN_COL2 107 -#define GPI_PIN_COL3 108 -#define GPI_PIN_COL4 109 -#define GPI_PIN_COL5 110 -#define GPI_PIN_COL6 111 -#define GPI_PIN_COL7 112 -#define GPI_PIN_COL8 113 -#define GPI_PIN_COL9 114 - -#define GPI_PIN_ROW_BASE GPI_PIN_ROW0 -#define GPI_PIN_ROW_END GPI_PIN_ROW7 -#define GPI_PIN_COL_BASE GPI_PIN_COL0 -#define GPI_PIN_COL_END GPI_PIN_COL9 - -#define GPI_PIN_BASE GPI_PIN_ROW_BASE -#define GPI_PIN_END GPI_PIN_COL_END - -#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1) - -struct adp5588_gpi_map { - unsigned short pin; - unsigned short sw_evt; -}; - -struct adp5588_kpad_platform_data { - int rows; /* Number of rows */ - int cols; /* Number of columns */ - const unsigned short *keymap; /* Pointer to keymap */ - unsigned short keymapsize; /* Keymap size */ - unsigned repeat:1; /* Enable key repeat */ - unsigned en_keylock:1; /* Enable Key Lock feature */ - unsigned short unlock_key1; /* Unlock Key 1 */ - unsigned short unlock_key2; /* Unlock Key 2 */ - const struct adp5588_gpi_map *gpimap; - unsigned short gpimapsize; - const struct adp5588_gpio_platform_data *gpio_data; -}; - -struct i2c_client; /* forward declaration */ - -struct adp5588_gpio_platform_data { - int gpio_start; /* GPIO Chip base # */ - const char *const *names; - unsigned irq_base; /* interrupt base # */ - unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void *context; -}; - -#endif diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h index c65b412b2b33..611bae193c1c 100644 --- a/include/linux/platform_data/dma-hsu.h +++ b/include/linux/platform_data/dma-hsu.h @@ -8,7 +8,7 @@ #ifndef _PLATFORM_DATA_DMA_HSU_H #define _PLATFORM_DATA_DMA_HSU_H -#include <linux/device.h> +struct device; struct hsu_dma_slave { struct device *dma_dev; diff --git a/include/linux/platform_data/usb-s3c2410_udc.h b/include/linux/platform_data/usb-s3c2410_udc.h index 07394819d03b..c0fbe1fe3426 100644 --- a/include/linux/platform_data/usb-s3c2410_udc.h +++ b/include/linux/platform_data/usb-s3c2410_udc.h @@ -22,12 +22,6 @@ enum s3c2410_udc_cmd_e { struct s3c2410_udc_mach_info { void (*udc_command)(enum s3c2410_udc_cmd_e); void (*vbus_draw)(unsigned int ma); - - unsigned int pullup_pin; - unsigned int pullup_pin_inverted; - - unsigned int vbus_pin; - unsigned char vbus_pin_inverted; }; extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *); diff --git a/include/linux/pm.h b/include/linux/pm.h index 871c9c49ec9d..93cd34f00822 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -375,19 +375,20 @@ const struct dev_pm_ops name = { \ } #ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec, ns) \ - _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn); \ - __EXPORT_SYMBOL(name, sec, ns) +#define _EXPORT_DEV_PM_OPS(name, sec, ns) \ + const struct dev_pm_ops name; \ + __EXPORT_SYMBOL(name, sec, ns); \ + const struct dev_pm_ops name #else -#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec, ns) \ -static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ - resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn) +#define _EXPORT_DEV_PM_OPS(name, sec, ns) \ + static __maybe_unused const struct dev_pm_ops __static_##name #endif +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "_gpl", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "_gpl", #ns) + /* * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. @@ -399,13 +400,21 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "") + EXPORT_DEV_PM_OPS(name) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "") + EXPORT_GPL_DEV_PM_OPS(name) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns) + EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns) + EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 0a41b2dcccad..9a8151a2bdea 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -40,17 +40,21 @@ resume_fn, idle_fn) #define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "", "") + EXPORT_DEV_PM_OPS(name) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "_gpl", "") + EXPORT_GPL_DEV_PM_OPS(name) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "", #ns) + EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "_gpl", #ns) + EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index cb380c1d9459..aa2c4a7c4826 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -374,9 +374,37 @@ struct power_supply_vbat_ri_table { * These timers should be chosen to align with the typical discharge curve * for the battery. * - * When the main CC/CV charging is complete the battery can optionally be - * maintenance charged at the voltages from this table: a table of settings is - * traversed using a slightly lower current and voltage than what is used for + * Ordinary CC/CV charging will stop charging when the charge current goes + * below charge_term_current_ua, and then restart it (if the device is still + * plugged into the charger) at charge_restart_voltage_uv. This happens in most + * consumer products because the power usage while connected to a charger is + * not zero, and devices are not manufactured to draw power directly from the + * charger: instead they will at all times dissipate the battery a little, like + * the power used in standby mode. This will over time give a charge graph + * such as this: + * + * Energy + * ^ ... ... ... ... ... ... ... + * | . . . . . . . . . . . . . + * | .. . .. . .. . .. . .. . .. . .. + * |. .. .. .. .. .. .. + * +-------------------------------------------------------------------> t + * + * Practically this means that the Li-ions are wandering back and forth in the + * battery and this causes degeneration of the battery anode and cathode. + * To prolong the life of the battery, maintenance charging is applied after + * reaching charge_term_current_ua to hold up the charge in the battery while + * consuming power, thus lowering the wear on the battery: + * + * Energy + * ^ ....................................... + * | . ...................... + * | .. + * |. + * +-------------------------------------------------------------------> t + * + * Maintenance charging uses the voltages from this table: a table of settings + * is traversed using a slightly lower current and voltage than what is used for * CC/CV charging. The maintenance charging will for safety reasons not go on * indefinately: we lower the current and voltage with successive maintenance * settings, then disable charging completely after we reach the last one, @@ -385,14 +413,22 @@ struct power_supply_vbat_ri_table { * ordinary CC/CV charging from there. * * As an example, a Samsung EB425161LA Lithium-Ion battery is CC/CV charged - * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for - * 60 hours, then maintenance charged at 600mA and 4100mV for 200 hours. + * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for up to + * 60 hours, then maintenance charged at 600mA and 4100mV for up to 200 hours. * After this the charge cycle is restarted waiting for * charge_restart_voltage_uv. * * For most mobile electronics this type of maintenance charging is enough for * the user to disconnect the device and make use of it before both maintenance - * charging cycles are complete. + * charging cycles are complete, if the current and voltage has been chosen + * appropriately. These need to be determined from battery discharge curves + * and expected standby current. + * + * If the voltage anyway drops to charge_restart_voltage_uv during maintenance + * charging, ordinary CC/CV charging is restarted. This can happen if the + * device is e.g. actively used during charging, so more current is drawn than + * the expected stand-by current. Also overvoltage protection will be applied + * as usual. */ struct power_supply_maintenance_charge_table { int charge_current_max_ua; diff --git a/include/linux/prandom.h b/include/linux/prandom.h index deace5fb4e62..78db003bc290 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -12,11 +12,13 @@ #include <linux/percpu.h> #include <linux/random.h> +/* Deprecated: use get_random_u32 instead. */ static inline u32 prandom_u32(void) { return get_random_u32(); } +/* Deprecated: use get_random_bytes instead. */ static inline void prandom_bytes(void *buf, size_t nbytes) { return get_random_bytes(buf, nbytes); @@ -37,17 +39,20 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) * @ep_ro: right open interval endpoint * - * Returns a pseudo-random number that is in interval [0, ep_ro). Note - * that the result depends on PRNG being well distributed in [0, ~0U] - * u32 space. Here we use maximally equidistributed combined Tausworthe - * generator, that is, prandom_u32(). This is useful when requesting a - * random index of an array containing ep_ro elements, for example. + * Returns a pseudo-random number that is in interval [0, ep_ro). This is + * useful when requesting a random index of an array containing ep_ro elements, + * for example. The result is somewhat biased when ep_ro is not a power of 2, + * so do not use this for cryptographic purposes. * * Returns: pseudo-random number in interval [0, ep_ro) */ static inline u32 prandom_u32_max(u32 ep_ro) { - return (u32)(((u64) prandom_u32() * ep_ro) >> 32); + if (__builtin_constant_p(ep_ro <= 1U << 8) && ep_ro <= 1U << 8) + return (get_random_u8() * ep_ro) >> 8; + if (__builtin_constant_p(ep_ro <= 1U << 16) && ep_ro <= 1U << 16) + return (get_random_u16() * ep_ro) >> 16; + return ((u64)get_random_u32() * ep_ro) >> 32; } /* diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b4381f255a5c..0df425bf9bd7 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -421,4 +421,46 @@ static inline void migrate_enable(void) { } #endif /* CONFIG_SMP */ +/** + * preempt_disable_nested - Disable preemption inside a normally preempt disabled section + * + * Use for code which requires preemption protection inside a critical + * section which has preemption disabled implicitly on non-PREEMPT_RT + * enabled kernels, by e.g.: + * - holding a spinlock/rwlock + * - soft interrupt context + * - regular interrupt handlers + * + * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft + * interrupt context and regular interrupt handlers are preemptible and + * only prevent migration. preempt_disable_nested() ensures that preemption + * is disabled for cases which require CPU local serialization even on + * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP. + * + * The use cases are code sequences which are not serialized by a + * particular lock instance, e.g.: + * - seqcount write side critical sections where the seqcount is not + * associated to a particular lock and therefore the automatic + * protection mechanism does not work. This prevents a live lock + * against a preempting high priority reader. + * - RMW per CPU variable updates like vmstat. + */ +/* Macro to avoid header recursion hell vs. lockdep */ +#define preempt_disable_nested() \ +do { \ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) \ + preempt_disable(); \ + else \ + lockdep_assert_preemption_disabled(); \ +} while (0) + +/** + * preempt_enable_nested - Undo the effect of preempt_disable_nested() + */ +static __always_inline void preempt_enable_nested(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); +} + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index cf7d666ab1f8..8c81806c2e99 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -169,8 +169,6 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit -extern bool pr_flush(int timeout_ms, bool reset_on_progress); - /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use @@ -221,11 +219,6 @@ static inline void printk_deferred_exit(void) { } -static inline bool pr_flush(int timeout_ms, bool reset_on_progress) -{ - return true; -} - static inline int printk_ratelimit(void) { return 0; diff --git a/include/linux/property.h b/include/linux/property.h index a5b429d623f6..117cc200c656 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -32,7 +32,7 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; -struct fwnode_handle *dev_fwnode(struct device *dev); +struct fwnode_handle *dev_fwnode(const struct device *dev); bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, @@ -387,7 +387,7 @@ bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); -const void *device_get_match_data(struct device *dev); +const void *device_get_match_data(const struct device *dev); int device_get_phy_mode(struct device *dev); int fwnode_get_phy_mode(struct fwnode_handle *fwnode); diff --git a/include/linux/random.h b/include/linux/random.h index 3fec206487f6..08322f700cdc 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -38,6 +38,8 @@ static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { #endif void get_random_bytes(void *buf, size_t len); +u8 get_random_u8(void); +u16 get_random_u16(void); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned int get_random_int(void) @@ -72,7 +74,8 @@ static inline unsigned long get_random_canary(void) return get_random_long() & CANARY_MASK; } -int __init random_init(const char *command_line); +void __init random_init_early(const char *command_line); +void __init random_init(void); bool rng_is_initialized(void); int wait_for_random_bytes(void); @@ -93,6 +96,8 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes) *out = get_random_ ## name(); \ return 0; \ } +declare_get_random_var_wait(u8, u8) +declare_get_random_var_wait(u16, u16) declare_get_random_var_wait(u32, u32) declare_get_random_var_wait(u64, u32) declare_get_random_var_wait(int, unsigned int) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index e5d9ef886179..2b6bb593be5b 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -106,6 +106,14 @@ enum sys_off_mode { SYS_OFF_MODE_POWER_OFF, /** + * @SYS_OFF_MODE_RESTART_PREPARE: + * + * Handlers prepare system to be restarted. Handlers are + * allowed to sleep. + */ + SYS_OFF_MODE_RESTART_PREPARE, + + /** * @SYS_OFF_MODE_RESTART: * * Handlers restart system. Handlers are disallowed to sleep. diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index aea79c77db0f..fe8978eb69f1 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -490,6 +490,20 @@ struct rproc_dump_segment { }; /** + * enum rproc_features - features supported + * + * @RPROC_FEAT_ATTACH_ON_RECOVERY: The remote processor does not need help + * from Linux to recover, such as firmware + * loading. Linux just needs to attach after + * recovery. + */ + +enum rproc_features { + RPROC_FEAT_ATTACH_ON_RECOVERY, + RPROC_MAX_FEATURES, +}; + +/** * struct rproc - represents a physical remote processor device * @node: list node of this rproc object * @domain: iommu domain @@ -530,6 +544,7 @@ struct rproc_dump_segment { * @elf_machine: firmware ELF machine * @cdev: character device of the rproc * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release + * @features: indicate remoteproc features */ struct rproc { struct list_head node; @@ -570,6 +585,7 @@ struct rproc { u16 elf_machine; struct cdev cdev; bool cdev_put_on_release; + DECLARE_BITMAP(features, RPROC_MAX_FEATURES); }; /** @@ -616,9 +632,8 @@ struct rproc_vring { /** * struct rproc_vdev - remoteproc state for a supported virtio device - * @refcount: reference counter for the vdev and vring allocations * @subdev: handle for registering the vdev as a rproc subdevice - * @dev: device struct used for reference count semantics + * @pdev: remoteproc virtio platform device * @id: virtio device id (as in virtio_ids.h) * @node: list node * @rproc: the rproc handle @@ -627,10 +642,9 @@ struct rproc_vring { * @index: vdev position versus other vdev declared in resource table */ struct rproc_vdev { - struct kref refcount; struct rproc_subdev subdev; - struct device dev; + struct platform_device *pdev; unsigned int id; struct list_head node; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index dac53fd3afea..2504df9a0453 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); - +void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b89b4b86951f..bd3504d11b15 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -166,7 +166,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, unlink_anon_vmas(next); } -struct anon_vma *page_get_anon_vma(struct page *page); +struct anon_vma *folio_get_anon_vma(struct folio *folio); /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -270,7 +270,7 @@ dup: * @page: the exclusive anonymous page to try marking possibly shared * * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated+flushed, to properly sync against GUP-fast. + * cleared/invalidated. * * This is similar to page_try_dup_anon_rmap(), however, not used during fork() * to duplicate a mapping, but instead to prepare for KSM or temporarily @@ -286,12 +286,68 @@ static inline int page_try_share_anon_rmap(struct page *page) { VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); - /* See page_try_dup_anon_rmap(). */ - if (likely(!is_device_private_page(page) && - unlikely(page_maybe_dma_pinned(page)))) - return -EBUSY; + /* device private pages cannot get pinned via GUP. */ + if (unlikely(is_device_private_page(page))) { + ClearPageAnonExclusive(page); + return 0; + } + + /* + * We have to make sure that when we clear PageAnonExclusive, that + * the page is not pinned and that concurrent GUP-fast won't succeed in + * concurrently pinning the page. + * + * Conceptually, PageAnonExclusive clearing consists of: + * (A1) Clear PTE + * (A2) Check if the page is pinned; back off if so. + * (A3) Clear PageAnonExclusive + * (A4) Restore PTE (optional, but certainly not writable) + * + * When clearing PageAnonExclusive, we cannot possibly map the page + * writable again, because anon pages that may be shared must never + * be writable. So in any case, if the PTE was writable it cannot + * be writable anymore afterwards and there would be a PTE change. Only + * if the PTE wasn't writable, there might not be a PTE change. + * + * Conceptually, GUP-fast pinning of an anon page consists of: + * (B1) Read the PTE + * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. + * (B3) Pin the mapped page + * (B4) Check if the PTE changed by re-reading it; back off if so. + * (B5) If the original PTE is not writable, check if + * PageAnonExclusive is not set; back off if so. + * + * If the PTE was writable, we only have to make sure that GUP-fast + * observes a PTE change and properly backs off. + * + * If the PTE was not writable, we have to make sure that GUP-fast either + * detects a (temporary) PTE change or that PageAnonExclusive is cleared + * and properly backs off. + * + * Consequently, when clearing PageAnonExclusive(), we have to make + * sure that (A1), (A2)/(A3) and (A4) happen in the right memory + * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) + * and (B5) happen in the right memory order. + * + * We assume that there might not be a memory barrier after + * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), + * so we use explicit ones here. + */ + /* Paired with the memory barrier in try_grab_folio(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb(); + + if (unlikely(page_maybe_dma_pinned(page))) + return -EBUSY; ClearPageAnonExclusive(page); + + /* + * This is conceptually a smp_wmb() paired with the smp_rmb() in + * gup_must_unshare(). + */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb__after_atomic(); return 0; } @@ -405,13 +461,8 @@ struct rmap_walk_control { void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); - -/* - * Called by memory-failure.c to kill processes. - */ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, struct rmap_walk_control *rwc); -void page_unlock_anon_vma_read(struct anon_vma *anon_vma); #else /* !CONFIG_MMU */ diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 8f416c5e929e..c0ef596f340b 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -1,7 +1,7 @@ #ifndef __LINUX_RWLOCK_H #define __LINUX_RWLOCK_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d82d6d32670..77f68f8b795c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,6 +14,7 @@ #include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> +#include <linux/kmsan_types.h> #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> @@ -81,25 +82,34 @@ struct task_group; */ /* Used in tsk->state: */ -#define TASK_RUNNING 0x0000 -#define TASK_INTERRUPTIBLE 0x0001 -#define TASK_UNINTERRUPTIBLE 0x0002 -#define __TASK_STOPPED 0x0004 -#define __TASK_TRACED 0x0008 +#define TASK_RUNNING 0x00000000 +#define TASK_INTERRUPTIBLE 0x00000001 +#define TASK_UNINTERRUPTIBLE 0x00000002 +#define __TASK_STOPPED 0x00000004 +#define __TASK_TRACED 0x00000008 /* Used in tsk->exit_state: */ -#define EXIT_DEAD 0x0010 -#define EXIT_ZOMBIE 0x0020 +#define EXIT_DEAD 0x00000010 +#define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->state again: */ -#define TASK_PARKED 0x0040 -#define TASK_DEAD 0x0080 -#define TASK_WAKEKILL 0x0100 -#define TASK_WAKING 0x0200 -#define TASK_NOLOAD 0x0400 -#define TASK_NEW 0x0800 -/* RT specific auxilliary flag to mark RT lock waiters */ -#define TASK_RTLOCK_WAIT 0x1000 -#define TASK_STATE_MAX 0x2000 +#define TASK_PARKED 0x00000040 +#define TASK_DEAD 0x00000080 +#define TASK_WAKEKILL 0x00000100 +#define TASK_WAKING 0x00000200 +#define TASK_NOLOAD 0x00000400 +#define TASK_NEW 0x00000800 +#define TASK_RTLOCK_WAIT 0x00001000 +#define TASK_FREEZABLE 0x00002000 +#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) +#define TASK_FROZEN 0x00008000 +#define TASK_STATE_MAX 0x00010000 + +#define TASK_ANY (TASK_STATE_MAX-1) + +/* + * DO NOT ADD ANY NEW USERS ! + */ +#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -861,7 +871,6 @@ struct task_struct { struct mm_struct *active_mm; /* Per-thread vma caching: */ - struct vmacache vmacache; #ifdef SPLIT_RSS_COUNTING struct task_rss_stat rss_stat; @@ -914,6 +923,10 @@ struct task_struct { #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif +#ifdef CONFIG_LRU_GEN + /* whether the LRU algorithm may apply to this access */ + unsigned in_lru_fault:1; +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif @@ -944,6 +957,10 @@ struct task_struct { #ifdef CONFIG_CPU_SUP_INTEL unsigned reported_split_lock:1; #endif +#ifdef CONFIG_TASK_DELAY_ACCT + /* delay due to memory thrashing */ + unsigned in_thrashing:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1355,6 +1372,10 @@ struct task_struct { #endif #endif +#ifdef CONFIG_KMSAN + struct kmsan_ctx kmsan_ctx; +#endif + #if IS_ENABLED(CONFIG_KUNIT) struct kunit *kunit_test; #endif @@ -1381,9 +1402,6 @@ struct task_struct { #endif #ifdef CONFIG_TRACING - /* State flags for use by tracers: */ - unsigned long trace; - /* Bitmask and counter of trace recursion: */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ @@ -1713,8 +1731,9 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +#define PF__HOLE__00004000 0x00004000 #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ +#define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ @@ -1722,10 +1741,14 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ +#define PF__HOLE__00800000 0x00800000 +#define PF__HOLE__01000000 0x01000000 +#define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF__HOLE__20000000 0x20000000 +#define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 4d0a5be28b70..8270ad7ae14c 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,9 +71,8 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ -#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ -#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ +#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either * replaced in the future by mm.pinned_vm when it becomes stable, or grow into @@ -81,7 +80,7 @@ static inline int get_dumpable(struct mm_struct *mm) * pinned pages were unpinned later on, we'll still keep this bit set for the * lifecycle of this mm, just for simplicity. */ -#define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */ +#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cafbe03eed01..20099268fa25 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -94,6 +94,7 @@ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; + int quick_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index e650946816d0..303ee7dd0c7e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -27,6 +27,7 @@ enum sched_tunable_scaling { #ifdef CONFIG_NUMA_BALANCING extern int sysctl_numa_balancing_mode; +extern unsigned int sysctl_numa_balancing_promote_rate_limit; #else #define sysctl_numa_balancing_mode 0 #endif diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 81cab4b01edc..d6c48163c6de 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -127,6 +127,9 @@ static inline void put_task_struct_many(struct task_struct *t, int nr) void put_task_struct_rcu_user(struct task_struct *task); +/* Free all architecture-specific resources held by a thread. */ +void release_thread(struct task_struct *dead_task); + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; #else diff --git a/include/linux/security.h b/include/linux/security.h index 87fac3af6dad..ca1b7109c0db 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -114,6 +114,7 @@ enum lockdown_reason { LOCKDOWN_IOPORT, LOCKDOWN_MSR, LOCKDOWN_ACPI_TABLES, + LOCKDOWN_DEVICE_TREE, LOCKDOWN_PCMCIA_CIS, LOCKDOWN_TIOCSSERIAL, LOCKDOWN_MODULE_PARAMETERS, @@ -122,6 +123,7 @@ enum lockdown_reason { LOCKDOWN_XMON_WR, LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, + LOCKDOWN_RTAS_ERROR_INJECTION, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 3368c261ab62..66f624fc618c 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/delay.h> diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 8c7b793aa4d7..19376bee9667 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -32,7 +32,7 @@ struct plat_serial8250_port { void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); @@ -74,6 +74,7 @@ struct uart_8250_port; struct uart_8250_ops { int (*setup_irq)(struct uart_8250_port *); void (*release_irq)(struct uart_8250_port *); + void (*setup_timer)(struct uart_8250_port *); }; struct uart_8250_em485 { @@ -157,7 +158,7 @@ extern int early_serial8250_setup(struct earlycon_device *device, extern void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk); extern void serial8250_do_set_termios(struct uart_port *port, - struct ktermios *termios, struct ktermios *old); + struct ktermios *termios, const struct ktermios *old); extern void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios); extern unsigned int serial8250_do_get_mctrl(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1eaea9fe44d8..d657f2a42a7b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -387,7 +387,7 @@ struct uart_ops { void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); @@ -422,7 +422,7 @@ struct uart_icount { __u32 buf_overrun; }; -typedef unsigned int __bitwise upf_t; +typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { @@ -433,7 +433,7 @@ struct uart_port { void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); @@ -513,23 +513,24 @@ struct uart_port { #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) -#define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) +#define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19)) /* Port has hardware-assisted h/w flow control */ -#define UPF_AUTO_CTS ((__force upf_t) (1 << 20)) -#define UPF_AUTO_RTS ((__force upf_t) (1 << 21)) +#define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20)) +#define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ -#define UPF_SOFT_FLOW ((__force upf_t) (1 << 22)) -#define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) -#define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) -#define UPF_EXAR_EFR ((__force upf_t) (1 << 25)) -#define UPF_BUG_THRE ((__force upf_t) (1 << 26)) +#define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22)) +#define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23)) +#define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24)) +#define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25)) +#define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26)) /* The exact UART type is known and should not be probed. */ -#define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) -#define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) -#define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) -#define UPF_DEAD ((__force upf_t) (1 << 30)) -#define UPF_IOREMAP ((__force upf_t) (1 << 31)) +#define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27)) +#define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28)) +#define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29)) +#define UPF_DEAD ((__force upf_t) BIT_ULL(30)) +#define UPF_IOREMAP ((__force upf_t) BIT_ULL(31)) +#define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) @@ -669,7 +670,7 @@ void uart_write_wakeup(struct uart_port *port); void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, - struct ktermios *old, unsigned int min, + const struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); @@ -950,5 +951,4 @@ static inline int uart_handle_break(struct uart_port *port) !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); -int uart_rs485_config(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff0b990de83d..d500ea967dc7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -92,17 +92,19 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); -extern bool shmem_is_huge(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index); -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) +extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, + pgoff_t index, bool shmem_huge_force); +static inline bool shmem_huge_enabled(struct vm_area_struct *vma, + bool shmem_huge_force) { - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, + shmem_huge_force); } extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); -/* Flag allocation requirements to shmem_getpage */ +/* Flag allocation requirements to shmem_get_folio */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */ @@ -111,8 +113,8 @@ enum sgp_type { SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; -extern int shmem_getpage(struct inode *inode, pgoff_t index, - struct page **pagep, enum sgp_type sgp); +int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, + enum sgp_type sgp); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) diff --git a/include/linux/slab.h b/include/linux/slab.h index 0fefdf528e0d..90877fcde70b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -29,6 +29,8 @@ #define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) /* DEBUG: Poison objects */ #define SLAB_POISON ((slab_flags_t __force)0x00000800U) +/* Indicate a kmalloc slab */ +#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) /* Align objs on cache lines */ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ @@ -106,7 +108,7 @@ # define SLAB_ACCOUNT 0 #endif -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC #define SLAB_KASAN ((slab_flags_t __force)0x08000000U) #else #define SLAB_KASAN 0 @@ -119,6 +121,12 @@ */ #define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#ifdef CONFIG_KFENCE +#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#else +#define SLAB_SKIP_KFENCE 0 +#endif + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) @@ -184,11 +192,25 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2); +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); + +/** + * ksize - Report actual allocation size of associated object + * + * @objp: Pointer returned from a prior kmalloc()-family allocation. + * + * This should not be used for writing beyond the originally requested + * allocation size. Either use krealloc() or round up the allocation size + * with kmalloc_size_roundup() prior to allocation. If this is used to + * access beyond the originally requested allocation size, UBSAN_BOUNDS + * and/or FORTIFY_SOURCE may trip, since they only know about the + * originally allocated size via the __alloc_size attribute. + */ size_t ksize(const void *objp); + #ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); @@ -243,27 +265,17 @@ static inline unsigned int arch_slab_minalign(void) #ifdef CONFIG_SLAB /* - * The largest kmalloc size supported by the SLAB allocators is - * 32 megabyte (2^25) or the maximum allocatable page order if that is - * less than 32 MB. - * - * WARNING: Its not easy to increase this value since the allocators have - * to do various tricks to work around compiler limitations in order to - * ensure proper constant folding. + * SLAB and SLUB directly allocates requests fitting in to an order-1 page + * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ - (MAX_ORDER + PAGE_SHIFT - 1) : 25) -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif #endif #ifdef CONFIG_SLUB -/* - * SLUB directly allocates requests fitting in to an order-1 page - * (PAGE_SIZE*2). Larger requests are passed to the page allocator. - */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW @@ -415,10 +427,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size, if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; - if (size <= 4 * 1024 * 1024) return 22; - if (size <= 8 * 1024 * 1024) return 23; - if (size <= 16 * 1024 * 1024) return 24; - if (size <= 32 * 1024 * 1024) return 25; if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); @@ -428,6 +436,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, /* Will never be reached. Needed because the compiler may complain */ return -1; } +static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ @@ -456,42 +465,22 @@ static __always_inline void kfree_bulk(size_t size, void **p) kmem_cache_free_bulk(NULL, size, p); } -#ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#else -static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc(size, flags); -} - -static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) -{ - return kmem_cache_alloc(s, flags); -} -#endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_slab_alignment __alloc_size(3); - -#ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment - __alloc_size(4); -#else -static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, int node, size_t size) -{ - return kmem_cache_alloc_trace(s, gfpflags, size); -} -#endif /* CONFIG_NUMA */ +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_kmalloc_alignment + __alloc_size(4); #else /* CONFIG_TRACING */ -static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, - gfp_t flags, size_t size) +/* Save a function call when CONFIG_TRACING=n */ +static __always_inline __alloc_size(3) +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -499,8 +488,9 @@ static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_ return ret; } -static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) +static __always_inline __alloc_size(4) +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = kmem_cache_alloc_node(s, gfpflags, node); @@ -509,25 +499,11 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment - __alloc_size(1); +void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment + __alloc_size(1); -#ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) - __assume_page_alignment __alloc_size(1); -#else -static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags, - unsigned int order) -{ - return kmalloc_order(size, flags, order); -} -#endif - -static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags) -{ - unsigned int order = get_order(size); - return kmalloc_order_trace(size, flags, order); -} +void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment + __alloc_size(1); /** * kmalloc - allocate memory @@ -597,7 +573,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace( + return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); #endif @@ -605,23 +581,35 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { -#ifndef CONFIG_SLOB - if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE) { - unsigned int i = kmalloc_index(size); + if (__builtin_constant_p(size)) { + unsigned int index; - if (!i) + if (size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + index = kmalloc_index(size); + + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][i], - flags, node, size); + return kmalloc_node_trace( + kmalloc_caches[kmalloc_type(flags)][index], + flags, node, size); } -#endif return __kmalloc_node(size, flags, node); } +#else +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + return __kmalloc_node(size, flags, node); +} +#endif /** * kmalloc_array - allocate memory for an array. @@ -647,10 +635,10 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_ * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p, - size_t new_n, - size_t new_size, - gfp_t flags) +static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; @@ -671,6 +659,12 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag return kmalloc_array(n, size, flags | __GFP_ZERO); } +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + _RET_IP_) + /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead @@ -679,9 +673,9 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, _RET_IP_) + __kmalloc_node_track_caller(size, flags, \ + NUMA_NO_NODE, _RET_IP_) static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, int node) @@ -700,21 +694,6 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); } - -#ifdef CONFIG_NUMA -extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) - -#else /* CONFIG_NUMA */ - -#define kmalloc_node_track_caller(size, flags, node) \ - kmalloc_track_caller(size, flags) - -#endif /* CONFIG_NUMA */ - /* * Shortcuts */ @@ -774,11 +753,28 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla } extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) - __alloc_size(3); + __realloc_size(3); extern void kvfree(const void *addr); extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); + +/** + * kmalloc_size_roundup - Report allocation bucket size for the given size + * + * @size: Number of bytes to round up from. + * + * This returns the number of bytes that would be available in a kmalloc() + * allocation of @size bytes. For example, a 126 byte request would be + * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly + * for the general-purpose kmalloc()-based allocations, and is not for the + * pre-sized kmem_cache_alloc()-based allocations.) + * + * Use this to kmalloc() the full bucket size ahead of time instead of using + * ksize() to query the size after an allocation. + */ +size_t kmalloc_size_roundup(size_t size); + void __init kmem_cache_init_late(void); #if defined(CONFIG_SMP) && defined(CONFIG_SLAB) diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 82c9d489833a..3ab8c07f71c0 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -41,6 +41,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d +#define QCOM_SMD_RPM_MMXI_CLK 0x69786d6d int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 822599957b35..9e4537f409c2 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -892,6 +892,9 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. + * @dev_num_ida_min: if set, defines the minimum values for the IDA + * used to allocate system-unique device numbers. This value needs to be + * identical across all SoundWire bus in the system. */ struct sdw_bus { struct device *dev; @@ -916,6 +919,7 @@ struct sdw_bus { u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; + int dev_num_ida_min; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index ec16ae49e6a4..2e9fd91572d4 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -15,32 +15,21 @@ #define SDW_LINK_SIZE 0x10000 /* Intel SHIM Registers Definition */ +/* LCAP */ #define SDW_SHIM_LCAP 0x0 -#define SDW_SHIM_LCTL 0x4 -#define SDW_SHIM_IPPTR 0x8 -#define SDW_SHIM_SYNC 0xC - -#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x)) -#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x)) -#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x)) -#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x)) -#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x)) -#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x)) +#define SDW_SHIM_LCAP_LCOUNT_MASK GENMASK(2, 0) -#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y))) -#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y))) -#define SDW_SHIM_PDMSCAP(x) (0x062 + 0x60 * (x)) -#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x)) -#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x)) - -#define SDW_SHIM_WAKEEN 0x190 -#define SDW_SHIM_WAKESTS 0x192 +/* LCTL */ +#define SDW_SHIM_LCTL 0x4 #define SDW_SHIM_LCTL_SPA BIT(0) #define SDW_SHIM_LCTL_SPA_MASK GENMASK(3, 0) #define SDW_SHIM_LCTL_CPA BIT(8) #define SDW_SHIM_LCTL_CPA_MASK GENMASK(11, 8) +/* SYNC */ +#define SDW_SHIM_SYNC 0xC + #define SDW_SHIM_SYNC_SYNCPRD_VAL_24 (24000 / SDW_CADENCE_GSYNC_KHZ - 1) #define SDW_SHIM_SYNC_SYNCPRD_VAL_38_4 (38400 / SDW_CADENCE_GSYNC_KHZ - 1) #define SDW_SHIM_SYNC_SYNCPRD GENMASK(14, 0) @@ -49,19 +38,33 @@ #define SDW_SHIM_SYNC_CMDSYNC BIT(16) #define SDW_SHIM_SYNC_SYNCGO BIT(24) +/* Control stream capabililities and channel mask */ +#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x)) +#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x)) +#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x)) +#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x)) +#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x)) + +/* PCM Stream capabilities */ +#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x)) + #define SDW_SHIM_PCMSCAP_ISS GENMASK(3, 0) #define SDW_SHIM_PCMSCAP_OSS GENMASK(7, 4) #define SDW_SHIM_PCMSCAP_BSS GENMASK(12, 8) +/* PCM Stream Channel Map */ +#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y))) + +/* PCM Stream Channel Count */ +#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y))) + #define SDW_SHIM_PCMSYCM_LCHN GENMASK(3, 0) #define SDW_SHIM_PCMSYCM_HCHN GENMASK(7, 4) #define SDW_SHIM_PCMSYCM_STREAM GENMASK(13, 8) #define SDW_SHIM_PCMSYCM_DIR BIT(15) -#define SDW_SHIM_PDMSCAP_ISS GENMASK(3, 0) -#define SDW_SHIM_PDMSCAP_OSS GENMASK(7, 4) -#define SDW_SHIM_PDMSCAP_BSS GENMASK(12, 8) -#define SDW_SHIM_PDMSCAP_CPSS GENMASK(15, 13) +/* IO control */ +#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x)) #define SDW_SHIM_IOCTL_MIF BIT(0) #define SDW_SHIM_IOCTL_CO BIT(1) @@ -73,13 +76,23 @@ #define SDW_SHIM_IOCTL_CIBD BIT(8) #define SDW_SHIM_IOCTL_DIBD BIT(9) -#define SDW_SHIM_CTMCTL_DACTQE BIT(0) -#define SDW_SHIM_CTMCTL_DODS BIT(1) -#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3) +/* Wake Enable*/ +#define SDW_SHIM_WAKEEN 0x190 #define SDW_SHIM_WAKEEN_ENABLE BIT(0) + +/* Wake Status */ +#define SDW_SHIM_WAKESTS 0x192 + #define SDW_SHIM_WAKESTS_STATUS BIT(0) +/* AC Timing control */ +#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x)) + +#define SDW_SHIM_CTMCTL_DACTQE BIT(0) +#define SDW_SHIM_CTMCTL_DODS BIT(1) +#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3) + /* Intel ALH Register definitions */ #define SDW_ALH_STRMZCFG(x) (0x000 + (0x4 * (x))) #define SDW_ALH_NUM_STREAMS 64 diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 5c0c5174155d..1341f7d62da4 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SPINLOCK_H #define __LINUX_SPINLOCK_H +#define __LINUX_INSIDE_SPINLOCK_H /* * include/linux/spinlock.h - generic spinlock/rwlock declarations @@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +#undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 51fa0dab68c4..89eb6f4c659c 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_SMP_H #define __LINUX_SPINLOCK_API_SMP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index b8ba00ccccde..819aeba1c87e 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_UP_H #define __LINUX_SPINLOCK_API_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 835aedaf68ac..61c49b16f69a 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -2,7 +2,7 @@ #ifndef __LINUX_SPINLOCK_RT_H #define __LINUX_SPINLOCK_RT_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H #error Do not include directly. Use spinlock.h #endif diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 16521074b6f7..c87204247592 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_UP_H #define __LINUX_SPINLOCK_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index bc2797955de9..9ca7798d7a31 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,9 +14,15 @@ #include <linux/gfp.h> typedef u32 depot_stack_handle_t; +/* + * Number of bits in the handle that stack depot doesn't use. Users may store + * information in them. + */ +#define STACK_DEPOT_EXTRA_BITS 5 depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, + unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); /* @@ -59,6 +65,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); + int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index dc2e726fd820..8530c7328269 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -128,4 +128,9 @@ static inline const char *str_enabled_disabled(bool v) return v ? "enabled" : "disabled"; } +static inline const char *str_read_write(bool v) +{ + return v ? "read" : "write"; +} + #endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 75eea5ebb179..770ef2cb5775 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -246,6 +246,7 @@ void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index acc62647317c..b8ca3ecaf8d7 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -209,11 +209,17 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); @@ -252,7 +258,7 @@ int rpc_malloc(struct rpc_task *); void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); +int rpc_wait_for_completion_task(struct rpc_task *task); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); @@ -264,11 +270,6 @@ extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); gfp_t rpc_task_gfp_mask(void); -static inline int rpc_wait_for_completion_task(struct rpc_task *task) -{ - return __rpc_wait_for_completion_task(task, NULL); -} - #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { diff --git a/include/linux/suspend.h b/include/linux/suspend.h index be7737262d8f..cfe19a028918 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -511,8 +511,8 @@ extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); -extern void lock_system_sleep(void); -extern void unlock_system_sleep(void); +extern unsigned int lock_system_sleep(void); +extern void unlock_system_sleep(unsigned int); #else /* !CONFIG_PM_SLEEP */ @@ -535,8 +535,8 @@ static inline void pm_system_wakeup(void) {} static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} -static inline void lock_system_sleep(void) {} -static inline void unlock_system_sleep(void) {} +static inline unsigned int lock_system_sleep(void) { return 0; } +static inline void unlock_system_sleep(unsigned int flags) {} #endif /* !CONFIG_PM_SLEEP */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 43150b9bbc5c..a18cf4b7c724 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -162,6 +162,10 @@ union swap_header { */ struct reclaim_state { unsigned long reclaimed_slab; +#ifdef CONFIG_LRU_GEN + /* per-thread mm walk data */ + struct lru_gen_mm_walk *mm_walk; +#endif }; #ifdef __KERNEL__ @@ -351,6 +355,11 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) return entry; } +static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) +{ + folio->private = (void *)entry.val; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); @@ -375,11 +384,11 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -extern void lru_note_cost(struct lruvec *lruvec, bool file, - unsigned int nr_pages); -extern void lru_note_cost_folio(struct folio *); -extern void folio_add_lru(struct folio *); -extern void lru_cache_add(struct page *); +void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); +void lru_note_cost_folio(struct folio *); +void folio_add_lru(struct folio *); +void folio_add_lru_vma(struct folio *, struct vm_area_struct *); +void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); @@ -481,7 +490,8 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); swp_entry_t folio_alloc_swap(struct folio *folio); -extern void put_swap_page(struct page *page, swp_entry_t entry); +bool folio_free_swap(struct folio *folio); +void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); extern int add_swap_count_continuation(swp_entry_t, gfp_t); @@ -500,7 +510,6 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); @@ -566,7 +575,7 @@ static inline void swap_free(swp_entry_t swp) { } -static inline void put_swap_page(struct page *page, swp_entry_t swp) +static inline void put_swap_folio(struct folio *folio, swp_entry_t swp) { } @@ -585,11 +594,6 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline int try_to_free_swap(struct page *page) -{ - return 0; -} - static inline swp_entry_t folio_alloc_swap(struct folio *folio) { swp_entry_t entry; @@ -597,6 +601,11 @@ static inline swp_entry_t folio_alloc_swap(struct folio *folio) return entry; } +static inline bool folio_free_swap(struct folio *folio) +{ + return false; +} + static inline int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) @@ -657,7 +666,7 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) cgroup_throttle_swaprate(&folio->page, gfp); } -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, @@ -677,7 +686,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p } extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); -extern bool mem_cgroup_swap_full(struct page *page); +extern bool mem_cgroup_swap_full(struct folio *folio); #else static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { @@ -699,7 +708,7 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) return get_nr_swap_pages(); } -static inline bool mem_cgroup_swap_full(struct page *page) +static inline bool mem_cgroup_swap_full(struct folio *folio) { return vm_swap_full(); } diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index a12dd1c3966c..ae73a87775b3 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -4,7 +4,7 @@ #include <linux/swap.h> -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); @@ -40,6 +40,6 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_MEMCG_SWAP */ +#endif #endif /* __LINUX_SWAP_CGROUP_H */ diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 54078542134c..7ed529a77c5b 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -8,6 +8,11 @@ */ extern struct swap_info_struct *swap_info[]; extern unsigned long generic_max_swapfile_size(void); -extern unsigned long max_swapfile_size(void); +unsigned long arch_max_swapfile_size(void); + +/* Maximum swapfile size supported for the arch (not inclusive). */ +extern unsigned long swapfile_maximum_size; +/* Whether swap migration entry supports storing A/D bits for the arch */ +extern bool swap_migration_ad_supported; #endif /* _LINUX_SWAPFILE_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index a3d435bf9f97..86b95ccb81bb 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -8,6 +8,10 @@ #ifdef CONFIG_MMU +#ifdef CONFIG_SWAP +#include <linux/swapfile.h> +#endif /* CONFIG_SWAP */ + /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in @@ -23,6 +27,45 @@ #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) +/* + * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To + * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries + * can use the extra bits to store other information besides PFN. + */ +#ifdef MAX_PHYSMEM_BITS +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#else /* MAX_PHYSMEM_BITS */ +#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#endif /* MAX_PHYSMEM_BITS */ +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) + +/** + * Migration swap entry specific bitfield definitions. Layout: + * + * |----------+--------------------| + * | swp_type | swp_offset | + * |----------+--------+-+-+-------| + * | | resv |D|A| PFN | + * |----------+--------+-+-+-------| + * + * @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A) + * @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D) + * + * Note: A/D bits will be stored in migration entries iff there're enough + * free bits in arch specific swp offset. By default we'll ignore A/D bits + * when migrating a page. Please refer to migration_entry_supports_ad() + * for more information. If there're more bits besides PFN and A/D bits, + * they should be reserved and always be zeros. + */ +#define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS) +#define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1) +#define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2) + +#define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) +#define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) + +static inline bool is_pfn_swap_entry(swp_entry_t entry); + /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { @@ -64,6 +107,17 @@ static inline pgoff_t swp_offset(swp_entry_t entry) return entry.val & SWP_OFFSET_MASK; } +/* + * This should only be called upon a pfn swap entry to get the PFN stored + * in the swap entry. Please refers to is_pfn_swap_entry() for definition + * of pfn swap entry. + */ +static inline unsigned long swp_offset_pfn(swp_entry_t entry) +{ + VM_BUG_ON(!is_pfn_swap_entry(entry)); + return swp_offset(entry) & SWP_PFN_MASK; +} + /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { @@ -240,6 +294,52 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) return swp_entry(SWP_MIGRATION_WRITE, offset); } +/* + * Returns whether the host has large enough swap offset field to support + * carrying over pgtable A/D bits for page migrations. The result is + * pretty much arch specific. + */ +static inline bool migration_entry_supports_ad(void) +{ +#ifdef CONFIG_SWAP + return swap_migration_ad_supported; +#else /* CONFIG_SWAP */ + return false; +#endif /* CONFIG_SWAP */ +} + +static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_entry(swp_type(entry), + swp_offset(entry) | SWP_MIG_YOUNG); + return entry; +} + +static inline bool is_migration_entry_young(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_YOUNG; + /* Keep the old behavior of aging page after migration */ + return false; +} + +static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_entry(swp_type(entry), + swp_offset(entry) | SWP_MIG_DIRTY); + return entry; +} + +static inline bool is_migration_entry_dirty(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_DIRTY; + /* Keep the old behavior of clean page after migration */ + return false; +} + extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, @@ -247,8 +347,8 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, #ifdef CONFIG_HUGETLB_PAGE extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); -#endif -#else +#endif /* CONFIG_HUGETLB_PAGE */ +#else /* CONFIG_MIGRATION */ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); @@ -276,7 +376,7 @@ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, #ifdef CONFIG_HUGETLB_PAGE static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } -#endif +#endif /* CONFIG_HUGETLB_PAGE */ static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; @@ -286,7 +386,26 @@ static inline int is_readable_migration_entry(swp_entry_t entry) return 0; } -#endif +static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) +{ + return entry; +} + +static inline bool is_migration_entry_young(swp_entry_t entry) +{ + return false; +} + +static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) +{ + return entry; +} + +static inline bool is_migration_entry_dirty(swp_entry_t entry) +{ + return false; +} +#endif /* CONFIG_MIGRATION */ typedef unsigned long pte_marker; @@ -369,7 +488,7 @@ static inline int pte_none_mostly(pte_t pte) static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { - struct page *p = pfn_to_page(swp_offset(entry)); + struct page *p = pfn_to_page(swp_offset_pfn(entry)); /* * Any use of migration entries may only occur while the @@ -387,6 +506,9 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { + /* Make sure the swp offset can always store the needed fields */ + BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); + return is_migration_entry(entry) || is_device_private_entry(entry) || is_device_exclusive_entry(entry); } @@ -426,7 +548,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd) { return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); } -#else +#else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) { @@ -455,7 +577,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd) { return 0; } -#endif +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE @@ -475,27 +597,17 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry) -{ - return swp_offset(entry); -} - static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); } -static inline void num_poisoned_pages_dec(void) -{ - atomic_long_dec(&num_poisoned_pages); -} - static inline void num_poisoned_pages_sub(long i) { atomic_long_sub(i, &num_poisoned_pages); } -#else +#else /* CONFIG_MEMORY_FAILURE */ static inline swp_entry_t make_hwpoison_entry(struct page *page) { @@ -514,7 +626,7 @@ static inline void num_poisoned_pages_inc(void) static inline void num_poisoned_pages_sub(long i) { } -#endif +#endif /* CONFIG_MEMORY_FAILURE */ static inline int non_swap_entry(swp_entry_t entry) { diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 86af908e2663..955f80e34d4f 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -8,6 +8,8 @@ #ifndef _LINUX_SYSLOG_H #define _LINUX_SYSLOG_H +#include <linux/wait.h> + /* Close the log. Currently a NOP. */ #define SYSLOG_ACTION_CLOSE 0 /* Open the log. Currently a NOP. */ @@ -35,5 +37,6 @@ #define SYSLOG_FROM_PROC 1 int do_syslog(int type, char __user *buf, int count, int source); +extern wait_queue_head_t log_wait; #endif /* _LINUX_SYSLOG_H */ diff --git a/include/linux/termios_internal.h b/include/linux/termios_internal.h new file mode 100644 index 000000000000..d77f29e5e2b7 --- /dev/null +++ b/include/linux/termios_internal.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TERMIOS_CONV_H +#define _LINUX_TERMIOS_CONV_H + +#include <linux/uaccess.h> +#include <asm/termios.h> + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^O werase=^W lnext=^V + eol2=\0 +*/ + +#ifdef VDSUSP +#define INIT_C_CC_VDSUSP_EXTRA [VDSUSP] = 'Y'-0x40, +#else +#define INIT_C_CC_VDSUSP_EXTRA +#endif + +#define INIT_C_CC { \ + [VINTR] = 'C'-0x40, \ + [VQUIT] = '\\'-0x40, \ + [VERASE] = '\177', \ + [VKILL] = 'U'-0x40, \ + [VEOF] = 'D'-0x40, \ + [VSTART] = 'Q'-0x40, \ + [VSTOP] = 'S'-0x40, \ + [VSUSP] = 'Z'-0x40, \ + [VREPRINT] = 'R'-0x40, \ + [VDISCARD] = 'O'-0x40, \ + [VWERASE] = 'W'-0x40, \ + [VLNEXT] = 'V'-0x40, \ + INIT_C_CC_VDSUSP_EXTRA \ + [VMIN] = 1 } + +int user_termio_to_kernel_termios(struct ktermios *, struct termio __user *); +int kernel_termios_to_user_termio(struct termio __user *, struct ktermios *); +#ifdef TCGETS2 +int user_termios_to_kernel_termios(struct ktermios *, struct termios2 __user *); +int kernel_termios_to_user_termios(struct termios2 __user *, struct ktermios *); +int user_termios_to_kernel_termios_1(struct ktermios *, struct termios __user *); +int kernel_termios_to_user_termios_1(struct termios __user *, struct ktermios *); +#else /* TCGETS2 */ +int user_termios_to_kernel_termios(struct ktermios *, struct termios __user *); +int kernel_termios_to_user_termios(struct termios __user *, struct ktermios *); +#endif /* TCGETS2 */ + +#endif /* _LINUX_TERMIOS_CONV_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6f1ec4fb7ef8..9ecc128944a1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -308,9 +308,6 @@ void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_dev void thermal_of_zone_unregister(struct thermal_zone_device *tz); -int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id); #else static inline struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, @@ -334,13 +331,6 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz) { } - -static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id) -{ - return -ENOENT; -} #endif #ifdef CONFIG_THERMAL diff --git a/include/linux/trace.h b/include/linux/trace.h index bf169612ffe1..b5e16e438448 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -2,8 +2,6 @@ #ifndef _LINUX_TRACE_H #define _LINUX_TRACE_H -#ifdef CONFIG_TRACING - #define TRACE_EXPORT_FUNCTION BIT(0) #define TRACE_EXPORT_EVENT BIT(1) #define TRACE_EXPORT_MARKER BIT(2) @@ -28,6 +26,8 @@ struct trace_export { int flags; }; +#ifdef CONFIG_TRACING + int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); @@ -48,6 +48,38 @@ void osnoise_arch_unregister(void); void osnoise_trace_irq_entry(int id); void osnoise_trace_irq_exit(int id, const char *desc); +#else /* CONFIG_TRACING */ +static inline int register_ftrace_export(struct trace_export *export) +{ + return -EINVAL; +} +static inline int unregister_ftrace_export(struct trace_export *export) +{ + return 0; +} +static inline void trace_printk_init_buffers(void) +{ +} +static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, + const char *fmt, ...) +{ + return 0; +} +static inline int trace_array_init_printk(struct trace_array *tr) +{ + return -EINVAL; +} +static inline void trace_array_put(struct trace_array *tr) +{ +} +static inline struct trace_array *trace_array_get_by_name(const char *name) +{ + return NULL; +} +static inline int trace_array_destroy(struct trace_array *tr) +{ + return 0; +} #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8401dec93c15..20749bd9db71 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -92,6 +92,7 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; + long wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/include/linux/tty.h b/include/linux/tty.h index 7b0a5d478ef6..730c3301d710 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -122,8 +122,6 @@ struct tty_operations; /** * struct tty_struct - state associated with a tty while open * - * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for - * debugging purposes * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero * frees the structure * @dev: class device or %NULL (e.g. ptys, serdev) @@ -193,7 +191,6 @@ struct tty_operations; * &struct tty_port. */ struct tty_struct { - int magic; struct kref kref; struct device *dev; struct tty_driver *driver; @@ -260,9 +257,6 @@ struct tty_file_private { struct list_head list; }; -/* tty magic number */ -#define TTY_MAGIC 0x5401 - /** * DOC: TTY Struct Flags * @@ -434,7 +428,7 @@ int tty_hung_up_p(struct file *filp); void do_SAK(struct tty_struct *tty); void __do_SAK(struct tty_struct *tty); void no_tty(void); -speed_t tty_termios_baud_rate(struct ktermios *termios); +speed_t tty_termios_baud_rate(const struct ktermios *termios); void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, @@ -458,7 +452,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); -void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); +void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 4841d8069c07..e00034118c7b 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -7,6 +7,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/cdev.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/seq_file.h> @@ -141,7 +142,7 @@ struct serial_struct; * * Optional. * - * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * @set_termios: ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This routine allows the @tty driver to be notified when device's * termios settings have changed. New settings are in @tty->termios. @@ -365,7 +366,7 @@ struct tty_operations { unsigned int cmd, unsigned long arg); long (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios * old); + void (*set_termios)(struct tty_struct *tty, const struct ktermios *old); void (*throttle)(struct tty_struct * tty); void (*unthrottle)(struct tty_struct * tty); void (*stop)(struct tty_struct *tty); @@ -396,7 +397,6 @@ struct tty_operations { /** * struct tty_driver -- driver for TTY devices * - * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver() * @kref: reference counting. Reaching zero frees all the internals and the * driver. * @cdevs: allocated/registered character /dev devices @@ -432,7 +432,6 @@ struct tty_operations { * @driver_name, @name, @type, @subtype, @init_termios, and @ops. */ struct tty_driver { - int magic; struct kref kref; struct cdev **cdevs; struct module *owner; @@ -489,9 +488,6 @@ static inline void tty_set_operations(struct tty_driver *driver, driver->ops = op; } -/* tty driver magic number */ -#define TTY_DRIVER_MAGIC 0x5402 - /** * DOC: TTY Driver Flags * diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index ede6f2157f32..dcb61ec11424 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -130,7 +130,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * a pointer to wordsize-sensitive structure belongs here, but most of * ldiscs will happily leave it %NULL. * - * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This function notifies the line discpline that a change has been made * to the termios structure. @@ -227,7 +227,7 @@ struct tty_ldisc_ops { unsigned long arg); int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios *old); + void (*set_termios)(struct tty_struct *tty, const struct ktermios *old); __poll_t (*poll)(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait); void (*hangup)(struct tty_struct *tty); diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 6ad4e9032d53..46040d66334a 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -8,7 +8,7 @@ * * Key points : * - * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP. + * - Use a seqcount on 32-bit * - The whole thing is a no-op on 64-bit architectures. * * Usage constraints: @@ -20,7 +20,8 @@ * writer and also spin forever. * * 3) Write side must use the _irqsave() variant if other writers, or a reader, - * can be invoked from an IRQ context. + * can be invoked from an IRQ context. On 64bit systems this variant does not + * disable interrupts. * * 4) If reader fetches several counters, there is no guarantee the whole values * are consistent w.r.t. each other (remember point #2: seqcounts are not @@ -29,11 +30,6 @@ * 5) Readers are allowed to sleep or be preempted/interrupted: they perform * pure reads. * - * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats - * might be updated from a hardirq or softirq context (remember point #1: - * seqcounts are not used for UP kernels). 32-bit UP stat readers could read - * corrupted 64-bit values otherwise. - * * Usage : * * Stats producer (writer) should use following template granted it already got @@ -66,7 +62,7 @@ #include <linux/seqlock.h> struct u64_stats_sync { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +#if BITS_PER_LONG == 32 seqcount_t seq; #endif }; @@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p) local64_inc(&p->v); } -#else +static inline void u64_stats_init(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { } +static inline unsigned long __u64_stats_irqsave(void) { return 0; } +static inline void __u64_stats_irqrestore(unsigned long flags) { } +static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ + return 0; +} +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) +{ + return false; +} + +#else /* 64 bit */ typedef struct { u64 v; @@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p) { p->v++; } -#endif -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) -#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) -#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { + seqcount_init(&syncp->seq); } -#endif -static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); write_seqcount_begin(&syncp->seq); -#endif } -static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif + preempt_enable_nested(); } -static inline unsigned long -u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +static inline unsigned long __u64_stats_irqsave(void) { - unsigned long flags = 0; + unsigned long flags; -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); - else - local_irq_save(flags); - write_seqcount_begin(&syncp->seq); -#endif + local_irq_save(flags); return flags; } -static inline void -u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, - unsigned long flags) +static inline void __u64_stats_irqrestore(unsigned long flags) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); - else - local_irq_restore(flags); -#endif + local_irq_restore(flags); } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); -#else - return 0; -#endif } -static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_disable(); -#endif - return __u64_stats_fetch_begin(syncp); + return read_seqcount_retry(&syncp->seq, start); } +#endif /* !64 bit */ -static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, - unsigned int start) +static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - return read_seqcount_retry(&syncp->seq, start); -#else - return false; -#endif + __u64_stats_update_begin(syncp); +} + +static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +{ + __u64_stats_update_end(syncp); +} + +static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = __u64_stats_irqsave(); + + __u64_stats_update_begin(syncp); + return flags; +} + +static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ + __u64_stats_update_end(syncp); + __u64_stats_irqrestore(flags); +} + +static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ + return __u64_stats_fetch_begin(syncp); } static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif return __u64_stats_fetch_retry(syncp, start); } -/* - * In case irq handlers can update u64 counters, readers can use following helpers - * - SMP 32bit arches use seqcount protection, irq safe. - * - UP 32bit must disable irqs. - * - 64bit have no problem atomically reading u64 values, irq safe. - */ +/* Obsolete interfaces */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_disable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_disable(); -#endif - return __u64_stats_fetch_begin(syncp); + return u64_stats_fetch_begin(syncp); } static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_enable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_enable(); -#endif - return __u64_stats_fetch_retry(syncp, start); + return u64_stats_fetch_retry(syncp, start); } #endif /* _LINUX_U64_STATS_SYNC_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 47e5d374c7eb..afb18f198843 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -58,20 +58,28 @@ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { - instrument_copy_from_user(to, from, n); + unsigned long res; + + instrument_copy_from_user_before(to, from, n); check_object_size(to, n, false); - return raw_copy_from_user(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); + return res; } static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res; + might_fault(); + instrument_copy_from_user_before(to, from, n); if (should_fail_usercopy()) return n; - instrument_copy_from_user(to, from, n); check_object_size(to, n, false); - return raw_copy_from_user(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); + return res; } /** @@ -115,8 +123,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { - instrument_copy_from_user(to, from, n); + instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); } if (unlikely(res)) memset(to + (n - res), 0, res); diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index 22345391350b..2516082cd3a9 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -23,7 +23,7 @@ #include <sound/ac97_codec.h> #include <linux/mutex.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> /* * UCB1400 AC-link registers diff --git a/include/linux/umh.h b/include/linux/umh.h index 244aff638220..5d1f6129b847 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -11,10 +11,11 @@ struct cred; struct file; -#define UMH_NO_WAIT 0 /* don't wait at all */ -#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ -#define UMH_WAIT_PROC 2 /* wait for the process to complete */ -#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ +#define UMH_NO_WAIT 0x00 /* don't wait at all */ +#define UMH_WAIT_EXEC 0x01 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 0x02 /* wait for the process to complete */ +#define UMH_KILLABLE 0x04 /* wait for EXEC/PROC killable */ +#define UMH_FREEZABLE 0x08 /* wait for EXEC/PROC freezable */ struct subprocess_info { struct work_struct work; diff --git a/include/linux/units.h b/include/linux/units.h index 681fc652e3d7..2793a41e73a2 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -20,6 +20,9 @@ #define PICO 1000000000000ULL #define FEMTO 1000000000000000ULL +#define NANOHZ_PER_HZ 1000000000UL +#define MICROHZ_PER_HZ 1000000UL +#define MILLIHZ_PER_HZ 1000UL #define HZ_PER_KHZ 1000UL #define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index edf3342507f1..ee38835ed77c 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -62,6 +62,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_REQUIRES_ALIGNED_DMA BIT(13) #define CI_HDRC_IMX_IS_HSIC BIT(14) #define CI_HDRC_PMQOS BIT(15) +#define CI_HDRC_PHY_VBUS_CONTROL BIT(16) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 67f8713d3fa3..78cd566ee238 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -479,7 +479,6 @@ static inline int ehset_single_step_set_feature(struct usb_hcd *hcd, int port) struct pci_dev; struct pci_device_id; extern int usb_hcd_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id, const struct hc_driver *driver); extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 8ea319f89e1f..f7bfedb740f5 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -276,8 +276,8 @@ struct usb_serial_driver { unsigned int cmd, unsigned long arg); void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss); int (*set_serial)(struct tty_struct *tty, struct serial_struct *ss); - void (*set_termios)(struct tty_struct *tty, - struct usb_serial_port *port, struct ktermios *old); + void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port, + const struct ktermios *old); void (*break_ctl)(struct tty_struct *tty, int break_state); unsigned int (*chars_in_buffer)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, long timeout); diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 20c0bedb8ec8..17657451c762 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -167,6 +167,11 @@ /* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */ #define TCPC_TRANSMIT_BUFFER_MAX_LEN 31 +#define tcpc_presenting_rd(reg, cc) \ + (!(TCPC_ROLE_CTRL_DRP & (reg)) && \ + (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \ + (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT))) + struct tcpci; /* @@ -207,4 +212,21 @@ irqreturn_t tcpci_irq(struct tcpci *tcpci); struct tcpm_port; struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci); + +static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink) +{ + switch (cc) { + case 0x1: + return sink ? TYPEC_CC_RP_DEF : TYPEC_CC_RA; + case 0x2: + return sink ? TYPEC_CC_RP_1_5 : TYPEC_CC_RD; + case 0x3: + if (sink) + return TYPEC_CC_RP_3_0; + fallthrough; + case 0x0: + default: + return TYPEC_CC_OPEN; + } +} #endif /* __LINUX_USB_TCPCI_H */ diff --git a/include/linux/user_events.h b/include/linux/user_events.h index 736e05603463..592a3fbed98e 100644 --- a/include/linux/user_events.h +++ b/include/linux/user_events.h @@ -20,15 +20,6 @@ #define USER_EVENTS_SYSTEM "user_events" #define USER_EVENTS_PREFIX "u:" -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - /* Create dynamic location entry within a 32-bit value */ #define DYN_LOC(offset, size) ((size) << 16 | (offset)) @@ -45,12 +36,12 @@ struct user_reg { /* Input: Pointer to string with event name, description and flags */ __u64 name_args; - /* Output: Byte index of the event within the status page */ - __u32 status_index; + /* Output: Bitwise index of the event within the status page */ + __u32 status_bit; /* Output: Index of the event to use when writing data */ __u32 write_index; -}; +} __attribute__((__packed__)); #define DIAG_IOC_MAGIC '*' diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 33a4240e6a6f..45f09bec02c4 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -54,15 +54,17 @@ enum ucount_type { UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_MARKS, #endif + UCOUNT_COUNTS, +}; + +enum rlimit_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_MEMLOCK, - UCOUNT_COUNTS, + UCOUNT_RLIMIT_COUNTS, }; -#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC - struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -99,6 +101,7 @@ struct user_namespace { #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; + long rlimit_max[UCOUNT_RLIMIT_COUNTS]; } __randomize_layout; struct ucounts { @@ -107,6 +110,7 @@ struct ucounts { kuid_t uid; atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; + atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; extern struct user_namespace init_user_ns; @@ -120,21 +124,26 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); -static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type) +static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { - return atomic_long_read(&ucounts->ucount[type]); + return atomic_long_read(&ucounts->rlimit[type]); } -long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); -bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); -void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); -bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); +long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); +bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); +void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); +bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); + +static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type) +{ + return READ_ONCE(ns->rlimit_max[type]); +} -static inline void set_rlimit_ucount_max(struct user_namespace *ns, - enum ucount_type type, unsigned long max) +static inline void set_userns_rlimit_max(struct user_namespace *ns, + enum rlimit_type type, unsigned long max) { - ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX; + ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } #ifdef CONFIG_USER_NS diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e1b8a915e9e9..f07e6998bb68 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -175,9 +175,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); -extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct list_head *uf); +extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, + unsigned long end, struct list_head *uf); extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); @@ -258,7 +257,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma, return true; } -static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, +static inline int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, unsigned long end, struct list_head *uf) { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d282f464d2f1..6d0f5e4e82c2 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -104,6 +104,7 @@ struct vdpa_iova_range { }; struct vdpa_dev_set_config { + u64 device_features; struct { u8 mac[ETH_ALEN]; u16 mtu; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e05ddc6fe6a5..e7cebeb875dd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <linux/poll.h> #include <uapi/linux/vfio.h> +#include <linux/iova_bitmap.h> struct kvm; @@ -33,10 +34,11 @@ struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; /* - * mig_ops is a static property of the vfio_device which must be set - * prior to registering the vfio_device. + * mig_ops/log_ops is a static property of the vfio_device which must + * be set prior to registering the vfio_device. */ const struct vfio_migration_ops *mig_ops; + const struct vfio_log_ops *log_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -45,7 +47,9 @@ struct vfio_device { struct kvm *kvm; /* Members below here are private, not for driver use */ - refcount_t refcount; + unsigned int index; + struct device device; /* device.kref covers object life circle */ + refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; struct list_head group_next; @@ -55,6 +59,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @init: initialize private fields in device structure + * @release: Reclaim private fields in device structure * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -72,6 +78,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*init)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -109,6 +117,28 @@ struct vfio_migration_ops { }; /** + * @log_start: Optional callback to ask the device start DMA logging. + * @log_stop: Optional callback to ask the device stop DMA logging. + * @log_read_and_clear: Optional callback to ask the device read + * and clear the dirty DMAs in some given range. + * + * The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set + * of features does not track logging state relative to the device, + * therefore the device implementation of vfio_log_ops must handle + * arbitrary user requests. This includes rejecting subsequent calls + * to log_start without an intervening log_stop, as well as graceful + * handling of log_stop and log_read_and_clear from invalid states. + */ +struct vfio_log_ops { + int (*log_start)(struct vfio_device *device, + struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); + int (*log_stop)(struct vfio_device *device); + int (*log_read_and_clear)(struct vfio_device *device, + unsigned long iova, unsigned long length, + struct iova_bitmap *dirty); +}; + +/** * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl * @flags: Arg from the device_feature op * @argsz: Arg from the device_feature op @@ -137,9 +167,23 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } -void vfio_init_group_dev(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops); -void vfio_uninit_group_dev(struct vfio_device *device); +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, + const struct vfio_device_ops *ops); +#define vfio_alloc_device(dev_struct, member, dev, ops) \ + container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct dev_struct, member)), \ + dev, ops), \ + struct dev_struct, member) + +int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); +void vfio_free_device(struct vfio_device *device); +static inline void vfio_put_device(struct vfio_device *device) +{ + put_device(&device->device); +} + int vfio_register_group_dev(struct vfio_device *device); int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); @@ -155,6 +199,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, * External user API */ struct iommu_group *vfio_file_iommu_group(struct file *file); +bool vfio_file_is_group(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 5579ece4347b..367fd79226a3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -20,39 +20,10 @@ #define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 - #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) -/* Special capability IDs predefined access */ -#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ -#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ - -/* Cap maximum number of ioeventfds per device (arbitrary) */ -#define VFIO_PCI_IOEVENTFD_MAX 1000 - -struct vfio_pci_ioeventfd { - struct list_head next; - struct vfio_pci_core_device *vdev; - struct virqfd *virqfd; - void __iomem *addr; - uint64_t data; - loff_t pos; - int bar; - int count; - bool test_mem; -}; - -struct vfio_pci_irq_ctx { - struct eventfd_ctx *trigger; - struct virqfd *unmask; - struct virqfd *mask; - char *name; - bool masked; - struct irq_bypass_producer producer; -}; - struct vfio_pci_core_device; struct vfio_pci_region; @@ -78,23 +49,6 @@ struct vfio_pci_region { u32 flags; }; -struct vfio_pci_dummy_resource { - struct resource resource; - int index; - struct list_head res_next; -}; - -struct vfio_pci_vf_token { - struct mutex lock; - uuid_t uuid; - int users; -}; - -struct vfio_pci_mmap_vma { - struct vm_area_struct *vma; - struct list_head vma_next; -}; - struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; @@ -124,11 +78,14 @@ struct vfio_pci_core_device { bool needs_reset; bool nointx; bool needs_pm_restore; + bool pm_intx_masked; + bool pm_runtime_engaged; struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; int ioeventfds_nr; struct eventfd_ctx *err_trigger; struct eventfd_ctx *req_trigger; + struct eventfd_ctx *pm_wake_eventfd_ctx; struct list_head dummy_resources_list; struct mutex ioeventfds_lock; struct list_head ioeventfds_list; @@ -141,100 +98,17 @@ struct vfio_pci_core_device { struct rw_semaphore memory_lock; }; -#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) -#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX) -#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX) -#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) -#define irq_is(vdev, type) (vdev->irq_type == type) - -void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); - -int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, void *data); - -ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); - -ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - -#ifdef CONFIG_VFIO_PCI_VGA -ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); -#else -static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) -{ - return -EINVAL; -} -#endif - -long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); - -int vfio_pci_init_perm_bits(void); -void vfio_pci_uninit_perm_bits(void); - -int vfio_config_init(struct vfio_pci_core_device *vdev); -void vfio_config_free(struct vfio_pci_core_device *vdev); - -int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, - unsigned int type, unsigned int subtype, - const struct vfio_pci_regops *ops, - size_t size, u32 flags, void *data); - -int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, - pci_power_t state); - -bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev); -u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); -void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, - u16 cmd); - -#ifdef CONFIG_VFIO_PCI_IGD -int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) -{ - return -ENODEV; -} -#endif - -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps); -int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); -void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps) -{ - return -ENODEV; -} - -static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) -{ - return 0; -} - -static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) -{} -#endif - /* Will be exported for vfio pci drivers usage */ +int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, bool is_disable_idle_d3); void vfio_pci_core_close_device(struct vfio_device *core_vdev); -void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, - struct pci_dev *pdev, - const struct vfio_device_ops *vfio_pci_ops); +int vfio_pci_core_init_dev(struct vfio_device *core_vdev); +void vfio_pci_core_release_dev(struct vfio_device *core_vdev); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); -void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); extern const struct pci_error_handlers vfio_pci_core_err_handlers; int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev, @@ -256,9 +130,4 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h index e5d665faf00e..a8dc757d0367 100644 --- a/include/linux/virtio_pci_legacy.h +++ b/include/linux/virtio_pci_legacy.h @@ -32,8 +32,6 @@ void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, u16 index, u32 queue_pfn); bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, u16 idx); -void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, - u16 idx, u16 size); u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, u16 idx); int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f3fc36cd2276..3518dba1e02f 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -129,10 +129,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -#ifdef CONFIG_DEBUG_VM_VMACACHE - VMACACHE_FIND_CALLS, - VMACACHE_FIND_HITS, -#endif #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h deleted file mode 100644 index 6fce268a4588..000000000000 --- a/include/linux/vmacache.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_VMACACHE_H -#define __LINUX_VMACACHE_H - -#include <linux/sched.h> -#include <linux/mm.h> - -static inline void vmacache_flush(struct task_struct *tsk) -{ - memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); -} - -extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); -extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, - unsigned long addr); - -#ifndef CONFIG_MMU -extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, - unsigned long start, - unsigned long end); -#endif - -static inline void vmacache_invalidate(struct mm_struct *mm) -{ - mm->vmacache_seqnum++; -} - -#endif /* __LINUX_VMACACHE_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index bfe38869498d..19cf5b6892ce 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -125,12 +125,6 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif -#ifdef CONFIG_DEBUG_VM_VMACACHE -#define count_vm_vmacache_event(x) count_vm_event(x) -#else -#define count_vm_vmacache_event(x) do {} while (0) -#endif - #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index b5ab452fca5b..c1f5aebef170 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -30,7 +30,6 @@ struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); void do_unblank_screen(int leaving_gfx); -void unblank_screen(void); void poke_blanked_console(void); int con_font_op(struct vc_data *vc, struct console_font_op *op); int con_set_cmap(unsigned char __user *cmap); diff --git a/include/linux/wait.h b/include/linux/wait.h index 58cfbf81447c..7f5a51aae0a7 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -281,7 +281,7 @@ static inline void wake_up_pollfree(struct wait_queue_head *wq_head) #define ___wait_is_interruptible(state) \ (!__builtin_constant_p(state) || \ - state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ + (state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); @@ -361,8 +361,8 @@ do { \ } while (0) #define __wait_event_freezable(wq_head, condition) \ - ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \ - freezable_schedule()) + ___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), \ + 0, 0, schedule()) /** * wait_event_freezable - sleep (or freeze) until a condition gets true @@ -420,8 +420,8 @@ do { \ #define __wait_event_freezable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - __ret = freezable_schedule_timeout(__ret)) + (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout, \ + __ret = schedule_timeout(__ret)) /* * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid @@ -642,8 +642,8 @@ do { \ #define __wait_event_freezable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ - freezable_schedule()) + ___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\ + schedule()) #define wait_event_freezable_exclusive(wq, condition) \ ({ \ @@ -932,6 +932,34 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_state(wq, condition, state) \ + ___wait_event(wq, condition, state, 0, 0, schedule()) + +/** + * wait_event_state - sleep until a condition gets true + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @state: state to sleep in + * + * The process is put to sleep (@state) until the @condition evaluates to true + * or a signal is received (when allowed by @state). The @condition is checked + * each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a signal + * (when allowed by @state) and 0 if @condition evaluated to true. + */ +#define wait_event_state(wq_head, condition, state) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_state(wq_head, condition, state); \ + __ret; \ +}) + #define __wait_event_killable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ TASK_KILLABLE, 0, timeout, \ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 2d1b54556eff..e6e34d74dda0 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -26,7 +26,15 @@ struct compat_iw_point { struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ - compat_caddr_t pointer; + + union { + compat_caddr_t pointer; + + /* we need ptr_bytes to make memcpy() run-time destination + * buffer bounds checking happy, nothing special + */ + DECLARE_FLEX_ARRAY(__u8, ptr_bytes); + }; }; #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3f045f6d6c4f..06f9291b6fd5 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -17,20 +17,12 @@ struct bio; DECLARE_PER_CPU(int, dirty_throttle_leaks); /* - * The 1/4 region under the global dirty thresh is for smooth dirty throttling: - * - * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) - * - * Further beyond, all dirtier tasks will enter a loop waiting (possibly long - * time) for the dirty pages to drop, unless written enough pages. - * * The global dirty threshold is normally equal to the global dirty limit, * except when the system suddenly allocates a lot of anonymous memory and * knocks down the global dirty threshold quickly, in which case the global * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. */ #define DIRTY_SCOPE 8 -#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) struct backing_dev_info; |