diff options
Diffstat (limited to 'drivers')
213 files changed, 5179 insertions, 1947 deletions
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index f0dad0c9ce33..cd24ccd20ba6 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (ret < 0) return ret; - buf[size] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, &process_quantum); if (ret != 4) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index f73ce6e13065..54676e3d82dd 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -231,16 +231,18 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, sizeof(struct acpi_table_pptt)); proc_sz = sizeof(struct acpi_pptt_processor); - while ((unsigned long)entry + proc_sz < table_end) { + /* ignore subtable types that are smaller than a processor node */ + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && cpu_node->parent == node_entry) return 0; if (entry->length == 0) return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, entry->length); - } return 1; } @@ -273,15 +275,18 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ - while ((unsigned long)entry + proc_sz < table_end) { + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->length == 0) { pr_warn("Invalid zero length subtable\n"); break; } + /* entry->length may not equal proc_sz, revalidate the processor structure length */ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && acpi_cpu_id == cpu_node->acpi_processor_id && + (unsigned long)entry + entry->length <= table_end && + entry->length == proc_sz + cpu_node->number_of_priv_resources * sizeof(u32) && acpi_pptt_leaf_node(table_hdr, cpu_node)) { return (struct acpi_pptt_processor *)entry; } diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 94c6446604fc..98da8c4eea59 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -187,7 +187,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, inode_lock(d_inode(root)); /* look it up */ - dentry = lookup_one_len(name, root, name_len); + dentry = lookup_noperm(&QSTR(name), root); if (IS_ERR(dentry)) { inode_unlock(d_inode(root)); ret = PTR_ERR(dentry); @@ -487,7 +487,7 @@ static struct dentry *binderfs_create_dentry(struct dentry *parent, { struct dentry *dentry; - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_noperm(&QSTR(name), parent); if (IS_ERR(dentry)) return dentry; diff --git a/drivers/base/node.c b/drivers/base/node.c index cd13ef287011..618712071a1e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -468,7 +468,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_PAGETABLE)), nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), nid, 0UL, - nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), + nid, 0UL, nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), nid, K(sreclaimable + node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index e48b24be45ee..0f70e2374e7f 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -407,4 +407,23 @@ config BLKDEV_UBLK_LEGACY_OPCODES source "drivers/block/rnbd/Kconfig" +config BLK_DEV_ZONED_LOOP + tristate "Zoned loopback device support" + depends on BLK_DEV_ZONED + help + Saying Y here will allow you to use create a zoned block device using + regular files for zones (one file per zones). This is useful to test + file systems, device mapper and applications that support zoned block + devices. To create a zoned loop device, no user utility is needed, a + zoned loop device can be created (or re-started) using a command + like: + + echo "add id=0,zone_size_mb=256,capacity_mb=16384,conv_zones=11" > \ + /dev/zloop-control + + See Documentation/admin-guide/blockdev/zoned_loop.rst for usage + details. + + If unsure, say N. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 1105a2d4fdcb..097707aca725 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,5 +41,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o +obj-$(CONFIG_BLK_DEV_ZONED_LOOP) += zloop.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 292f127cae0a..b1be6c510372 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -54,32 +54,33 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) /* * Insert a new page for a given sector, if one does not already exist. */ -static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) +static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, + blk_opf_t opf) + __releases(rcu) + __acquires(rcu) { - pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; - struct page *page; - int ret = 0; - - page = brd_lookup_page(brd, sector); - if (page) - return 0; + gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; + struct page *page, *ret; + rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); + rcu_read_lock(); if (!page) - return -ENOMEM; + return ERR_PTR(-ENOMEM); xa_lock(&brd->brd_pages); - ret = __xa_insert(&brd->brd_pages, idx, page, gfp); - if (!ret) - brd->brd_nr_pages++; - xa_unlock(&brd->brd_pages); - - if (ret < 0) { + ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, + page, gfp); + if (ret) { + xa_unlock(&brd->brd_pages); __free_page(page); - if (ret == -EBUSY) - ret = 0; + if (xa_is_err(ret)) + return ERR_PTR(xa_err(ret)); + return ret; } - return ret; + brd->brd_nr_pages++; + xa_unlock(&brd->brd_pages); + return page; } /* @@ -100,143 +101,77 @@ static void brd_free_pages(struct brd_device *brd) } /* - * copy_to_brd_setup must be called before copy_to_brd. It may sleep. + * Process a single segment. The segment is capped to not cross page boundaries + * in both the bio and the brd backing memory. */ -static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, - gfp_t gfp) -{ - unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; - size_t copy; - int ret; - - copy = min_t(size_t, n, PAGE_SIZE - offset); - ret = brd_insert_page(brd, sector, gfp); - if (ret) - return ret; - if (copy < n) { - sector += copy >> SECTOR_SHIFT; - ret = brd_insert_page(brd, sector, gfp); - } - return ret; -} - -/* - * Copy n bytes from src to the brd starting at sector. Does not sleep. - */ -static void copy_to_brd(struct brd_device *brd, const void *src, - sector_t sector, size_t n) +static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) { + struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); + sector_t sector = bio->bi_iter.bi_sector; + u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT; + blk_opf_t opf = bio->bi_opf; struct page *page; - void *dst; - unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; - size_t copy; + void *kaddr; - copy = min_t(size_t, n, PAGE_SIZE - offset); - page = brd_lookup_page(brd, sector); - BUG_ON(!page); - - dst = kmap_atomic(page); - memcpy(dst + offset, src, copy); - kunmap_atomic(dst); - - if (copy < n) { - src += copy; - sector += copy >> SECTOR_SHIFT; - copy = n - copy; - page = brd_lookup_page(brd, sector); - BUG_ON(!page); - - dst = kmap_atomic(page); - memcpy(dst, src, copy); - kunmap_atomic(dst); - } -} + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); -/* - * Copy n bytes to dst from the brd starting at sector. Does not sleep. - */ -static void copy_from_brd(void *dst, struct brd_device *brd, - sector_t sector, size_t n) -{ - struct page *page; - void *src; - unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; - size_t copy; - - copy = min_t(size_t, n, PAGE_SIZE - offset); + rcu_read_lock(); page = brd_lookup_page(brd, sector); - if (page) { - src = kmap_atomic(page); - memcpy(dst, src + offset, copy); - kunmap_atomic(src); - } else - memset(dst, 0, copy); - - if (copy < n) { - dst += copy; - sector += copy >> SECTOR_SHIFT; - copy = n - copy; - page = brd_lookup_page(brd, sector); - if (page) { - src = kmap_atomic(page); - memcpy(dst, src, copy); - kunmap_atomic(src); - } else - memset(dst, 0, copy); + if (!page && op_is_write(opf)) { + page = brd_insert_page(brd, sector, opf); + if (IS_ERR(page)) + goto out_error; } -} - -/* - * Process a single bvec of a bio. - */ -static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, blk_opf_t opf, - sector_t sector) -{ - void *mem; - int err = 0; + kaddr = bvec_kmap_local(&bv); if (op_is_write(opf)) { - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; - - err = copy_to_brd_setup(brd, sector, len, gfp); - if (err) - goto out; - } - - mem = kmap_atomic(page); - if (!op_is_write(opf)) { - copy_from_brd(mem + off, brd, sector, len); - flush_dcache_page(page); + memcpy_to_page(page, offset, kaddr, bv.bv_len); } else { - flush_dcache_page(page); - copy_to_brd(brd, mem + off, sector, len); + if (page) + memcpy_from_page(kaddr, page, offset, bv.bv_len); + else + memset(kaddr, 0, bv.bv_len); } - kunmap_atomic(mem); + kunmap_local(kaddr); + rcu_read_unlock(); + + bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); + return true; + +out_error: + rcu_read_unlock(); + if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) + bio_wouldblock_error(bio); + else + bio_io_error(bio); + return false; +} -out: - return err; +static void brd_free_one_page(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + + __free_page(page); } static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { - sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS; + sector_t aligned_sector = round_up(sector, PAGE_SECTORS); + sector_t aligned_end = round_down( + sector + (size >> SECTOR_SHIFT), PAGE_SECTORS); struct page *page; - size -= (aligned_sector - sector) * SECTOR_SIZE; + if (aligned_end <= aligned_sector) + return; + xa_lock(&brd->brd_pages); - while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) { + while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { - __free_page(page); + call_rcu(&page->rcu_head, brd_free_one_page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; - size -= PAGE_SIZE; } xa_unlock(&brd->brd_pages); } @@ -244,36 +179,18 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; - sector_t sector = bio->bi_iter.bi_sector; - struct bio_vec bvec; - struct bvec_iter iter; if (unlikely(op_is_discard(bio->bi_opf))) { - brd_do_discard(brd, sector, bio->bi_iter.bi_size); + brd_do_discard(brd, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size); bio_endio(bio); return; } - bio_for_each_segment(bvec, bio, iter) { - unsigned int len = bvec.bv_len; - int err; - - /* Don't support un-aligned buffer */ - WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || - (len & (SECTOR_SIZE - 1))); - - err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio->bi_opf, sector); - if (err) { - if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return; - } - bio_io_error(bio); + do { + if (!brd_rw_bvec(brd, bio)) return; - } - sector += len >> SECTOR_SHIFT; - } + } while (bio->bi_iter.bi_size); bio_endio(bio); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8ba7de08753..e2b1f377f585 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -979,9 +979,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (!file) return -EBADF; - if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter) - return -EINVAL; - error = loop_check_backing_file(file); if (error) return error; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 65b96c083b3c..d5cc7bd2875c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -725,7 +725,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * scmd = blk_mq_rq_to_pdu(rq); if (cgc->buflen) { - ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, + ret = blk_rq_map_kern(rq, cgc->buffer, cgc->buflen, GFP_NOIO); if (ret) goto out; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 2ee6e9bd4e28..2df8941a6b14 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -147,12 +147,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1, rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); - if (bio_add_page(bio, virt_to_page(data), datalen, - offset_in_page(data)) != datalen) { - rnbd_srv_err_rl(sess_dev, "Failed to map data to bio\n"); - err = -EINVAL; - goto bio_put; - } + bio_add_virt_nofail(bio, data, datalen); bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); if (bio_has_data(bio) && diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f9032076bc06..6f51072776f1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -50,6 +50,8 @@ /* private ioctl command mirror */ #define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC) +#define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE) +#define UBLK_CMD_QUIESCE_DEV _IOC_NR(UBLK_U_CMD_QUIESCE_DEV) #define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF) #define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF) @@ -64,7 +66,10 @@ | UBLK_F_CMD_IOCTL_ENCODE \ | UBLK_F_USER_COPY \ | UBLK_F_ZONED \ - | UBLK_F_USER_RECOVERY_FAIL_IO) + | UBLK_F_USER_RECOVERY_FAIL_IO \ + | UBLK_F_UPDATE_SIZE \ + | UBLK_F_AUTO_BUF_REG \ + | UBLK_F_QUIESCE) #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ | UBLK_F_USER_RECOVERY_REISSUE \ @@ -77,7 +82,11 @@ UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) struct ublk_rq_data { - struct kref ref; + refcount_t ref; + + /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */ + u16 buf_index; + void *buf_ctx_handle; }; struct ublk_uring_cmd_pdu { @@ -99,6 +108,9 @@ struct ublk_uring_cmd_pdu { * setup in ublk uring_cmd handler */ struct ublk_queue *ubq; + + struct ublk_auto_buf_reg buf; + u16 tag; }; @@ -131,6 +143,14 @@ struct ublk_uring_cmd_pdu { */ #define UBLK_IO_FLAG_NEED_GET_DATA 0x08 +/* + * request buffer is registered automatically, so we have to unregister it + * before completing this request. + * + * io_uring will unregister buffer automatically for us during exiting. + */ +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10 + /* atomic RW with ubq->cancel_lock */ #define UBLK_IO_FLAG_CANCELED 0x80000000 @@ -140,7 +160,12 @@ struct ublk_io { unsigned int flags; int res; - struct io_uring_cmd *cmd; + union { + /* valid if UBLK_IO_FLAG_ACTIVE is set */ + struct io_uring_cmd *cmd; + /* valid if UBLK_IO_FLAG_OWNED_BY_SRV is set */ + struct request *req; + }; }; struct ublk_queue { @@ -198,13 +223,19 @@ struct ublk_params_header { __u32 types; }; +static void ublk_io_release(void *priv); static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); -static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, - int tag); + +static inline struct ublksrv_io_desc * +ublk_get_iod(const struct ublk_queue *ubq, unsigned tag) +{ + return &ubq->io_cmd_buf[tag]; +} + static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -356,8 +387,7 @@ static int ublk_report_zones(struct gendisk *disk, sector_t sector, if (ret) goto free_req; - ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length, - GFP_KERNEL); + ret = blk_rq_map_kern(req, buffer, buffer_length, GFP_KERNEL); if (ret) goto erase_desc; @@ -477,7 +507,6 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, #endif static inline void __ublk_complete_rq(struct request *req); -static void ublk_complete_rq(struct kref *ref); static dev_t ublk_chr_devt; static const struct class ublk_chr_class = { @@ -609,6 +638,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; } +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_AUTO_BUF_REG; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_USER_COPY; @@ -616,7 +650,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) && + !ublk_support_auto_buf_reg(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -627,8 +662,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) * * for zero copy, request buffer need to be registered to io_uring * buffer table, so reference is needed + * + * For auto buffer register, ublk server still may issue + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up, + * so reference is required too. */ - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) || + ublk_support_auto_buf_reg(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -637,7 +677,7 @@ static inline void ublk_init_req_ref(const struct ublk_queue *ubq, if (ublk_need_req_ref(ubq)) { struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - kref_init(&data->ref); + refcount_set(&data->ref, 1); } } @@ -647,7 +687,7 @@ static inline bool ublk_get_req_ref(const struct ublk_queue *ubq, if (ublk_need_req_ref(ubq)) { struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - return kref_get_unless_zero(&data->ref); + return refcount_inc_not_zero(&data->ref); } return true; @@ -659,7 +699,8 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq, if (ublk_need_req_ref(ubq)) { struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - kref_put(&data->ref, ublk_complete_rq); + if (refcount_dec_and_test(&data->ref)) + __ublk_complete_rq(req); } else { __ublk_complete_rq(req); } @@ -695,12 +736,6 @@ static inline bool ublk_rq_has_data(const struct request *rq) return bio_has_data(rq->bio); } -static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, - int tag) -{ - return &ubq->io_cmd_buf[tag]; -} - static inline struct ublksrv_io_desc * ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) { @@ -1117,18 +1152,12 @@ exit: blk_mq_end_request(req, res); } -static void ublk_complete_rq(struct kref *ref) +static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, + int res, unsigned issue_flags) { - struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data, - ref); - struct request *req = blk_mq_rq_from_pdu(data); + /* read cmd first because req will overwrite it */ + struct io_uring_cmd *cmd = io->cmd; - __ublk_complete_rq(req); -} - -static void ubq_complete_io_cmd(struct ublk_io *io, int res, - unsigned issue_flags) -{ /* mark this cmd owned by ublksrv */ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; @@ -1138,8 +1167,10 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res, */ io->flags &= ~UBLK_IO_FLAG_ACTIVE; + io->req = req; + /* tell ublksrv one io request is coming */ - io_uring_cmd_done(io->cmd, res, 0, issue_flags); + io_uring_cmd_done(cmd, res, 0, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -1154,16 +1185,91 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } +static void ublk_auto_buf_reg_fallback(struct request *req) +{ + const struct ublk_queue *ubq = req->mq_hctx->driver_data; + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + + iod->op_flags |= UBLK_IO_F_NEED_REG_BUF; + refcount_set(&data->ref, 1); +} + +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, + unsigned int issue_flags) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + int ret; + + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, + pdu->buf.index, issue_flags); + if (ret) { + if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) { + ublk_auto_buf_reg_fallback(req); + return true; + } + blk_mq_end_request(req, BLK_STS_IOERR); + return false; + } + /* one extra reference is dropped by ublk_io_release */ + refcount_set(&data->ref, 2); + + data->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); + /* store buffer index in request payload */ + data->buf_index = pdu->buf.index; + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; + return true; +} + +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq, + struct request *req, struct ublk_io *io, + unsigned int issue_flags) +{ + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) + return ublk_auto_buf_reg(req, io, issue_flags); + + ublk_init_req_ref(ubq, req); + return true; +} + +static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, + struct ublk_io *io) +{ + unsigned mapped_bytes = ublk_map_io(ubq, req, io); + + /* partially mapped, update io descriptor */ + if (unlikely(mapped_bytes != blk_rq_bytes(req))) { + /* + * Nothing mapped, retry until we succeed. + * + * We may never succeed in mapping any bytes here because + * of OOM. TODO: reserve one buffer with single page pinned + * for providing forward progress guarantee. + */ + if (unlikely(!mapped_bytes)) { + blk_mq_requeue_request(req, false); + blk_mq_delay_kick_requeue_list(req->q, + UBLK_REQUEUE_DELAY_MS); + return false; + } + + ublk_get_iod(ubq, req->tag)->nr_sectors = + mapped_bytes >> 9; + } + + return true; +} + static void ublk_dispatch_req(struct ublk_queue *ubq, struct request *req, unsigned int issue_flags) { int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; - unsigned int mapped_bytes; - pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n", - __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags, + pr_devel("%s: complete: qid %d tag %d io_flags %x addr %llx\n", + __func__, ubq->q_id, req->tag, io->flags, ublk_get_iod(ubq, req->tag)->addr); /* @@ -1183,54 +1289,22 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) { /* * We have not handled UBLK_IO_NEED_GET_DATA command yet, - * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv + * so immediately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv * and notify it. */ - if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) { - io->flags |= UBLK_IO_FLAG_NEED_GET_DATA; - pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n", - __func__, io->cmd->cmd_op, ubq->q_id, - req->tag, io->flags); - ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags); - return; - } - /* - * We have handled UBLK_IO_NEED_GET_DATA command, - * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just - * do the copy work. - */ - io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA; - /* update iod->addr because ublksrv may have passed a new io buffer */ - ublk_get_iod(ubq, req->tag)->addr = io->addr; - pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n", - __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags, - ublk_get_iod(ubq, req->tag)->addr); + io->flags |= UBLK_IO_FLAG_NEED_GET_DATA; + pr_devel("%s: need get data. qid %d tag %d io_flags %x\n", + __func__, ubq->q_id, req->tag, io->flags); + ublk_complete_io_cmd(io, req, UBLK_IO_RES_NEED_GET_DATA, + issue_flags); + return; } - mapped_bytes = ublk_map_io(ubq, req, io); - - /* partially mapped, update io descriptor */ - if (unlikely(mapped_bytes != blk_rq_bytes(req))) { - /* - * Nothing mapped, retry until we succeed. - * - * We may never succeed in mapping any bytes here because - * of OOM. TODO: reserve one buffer with single page pinned - * for providing forward progress guarantee. - */ - if (unlikely(!mapped_bytes)) { - blk_mq_requeue_request(req, false); - blk_mq_delay_kick_requeue_list(req->q, - UBLK_REQUEUE_DELAY_MS); - return; - } - - ublk_get_iod(ubq, req->tag)->nr_sectors = - mapped_bytes >> 9; - } + if (!ublk_start_io(ubq, req, io)) + return; - ublk_init_req_ref(ubq, req); - ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags)) + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags); } static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, @@ -1590,30 +1664,6 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); } -static void ublk_commit_completion(struct ublk_device *ub, - const struct ublksrv_io_cmd *ub_cmd) -{ - u32 qid = ub_cmd->q_id, tag = ub_cmd->tag; - struct ublk_queue *ubq = ublk_get_queue(ub, qid); - struct ublk_io *io = &ubq->ios[tag]; - struct request *req; - - /* now this cmd slot is owned by nbd driver */ - io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; - io->res = ub_cmd->result; - - /* find the io request and complete */ - req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag); - if (WARN_ON_ONCE(unlikely(!req))) - return; - - if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = ub_cmd->zone_append_lba; - - if (likely(!blk_should_fake_timeout(req->q))) - ublk_put_req_ref(ubq, req); -} - static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, struct request *req) { @@ -1642,17 +1692,8 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; - if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) { - struct request *rq; - - /* - * Either we fail the request or ublk_rq_task_work_cb - * will do it - */ - rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); - if (rq && blk_mq_request_started(rq)) - __ublk_fail_req(ubq, io, rq); - } + if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) + __ublk_fail_req(ubq, io, io->req); } } @@ -1708,7 +1749,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, * that ublk_dispatch_req() is always called */ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); - if (req && blk_mq_request_started(req)) + if (req && blk_mq_request_started(req) && req->tag == tag) return; spin_lock(&ubq->cancel_lock); @@ -1940,6 +1981,20 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, io_uring_cmd_mark_cancelable(cmd, issue_flags); } +static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr)); + + if (pdu->buf.reserved0 || pdu->buf.reserved1) + return -EINVAL; + + if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK) + return -EINVAL; + return 0; +} + static void ublk_io_release(void *priv) { struct request *rq = priv; @@ -1953,16 +2008,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; - const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; if (!ublk_support_zero_copy(ubq)) return -EINVAL; - if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) - return -EINVAL; - req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1978,17 +2029,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int index, unsigned int issue_flags) { - const struct ublk_io *io = &ubq->ios[tag]; - if (!ublk_support_zero_copy(ubq)) return -EINVAL; - if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) - return -EINVAL; - return io_buffer_unregister_bvec(cmd, index, issue_flags); } @@ -2031,6 +2077,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, goto out; } + if (ublk_support_auto_buf_reg(ubq)) { + ret = ublk_set_auto_buf_reg(cmd); + if (ret) + goto out; + } + ublk_fill_io_cmd(io, cmd, buf_addr); ublk_mark_io_ready(ub, ubq); out: @@ -2038,6 +2090,90 @@ out: return ret; } +static int ublk_commit_and_fetch(const struct ublk_queue *ubq, + struct ublk_io *io, struct io_uring_cmd *cmd, + const struct ublksrv_io_cmd *ub_cmd, + unsigned int issue_flags) +{ + struct request *req = io->req; + + if (ublk_need_map_io(ubq)) { + /* + * COMMIT_AND_FETCH_REQ has to provide IO buffer if + * NEED GET DATA is not enabled or it is Read IO. + */ + if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || + req_op(req) == REQ_OP_READ)) + return -EINVAL; + } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) { + /* + * User copy requires addr to be unset when command is + * not zone append + */ + return -EINVAL; + } + + if (ublk_support_auto_buf_reg(ubq)) { + int ret; + + /* + * `UBLK_F_AUTO_BUF_REG` only works iff `UBLK_IO_FETCH_REQ` + * and `UBLK_IO_COMMIT_AND_FETCH_REQ` are issued from same + * `io_ring_ctx`. + * + * If this uring_cmd's io_ring_ctx isn't same with the + * one for registering the buffer, it is ublk server's + * responsibility for unregistering the buffer, otherwise + * this ublk request gets stuck. + */ + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + + if (data->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd)) + io_buffer_unregister_bvec(cmd, data->buf_index, + issue_flags); + io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; + } + + ret = ublk_set_auto_buf_reg(cmd); + if (ret) + return ret; + } + + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); + + /* now this cmd slot is owned by ublk driver */ + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + io->res = ub_cmd->result; + + if (req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = ub_cmd->zone_append_lba; + + if (likely(!blk_should_fake_timeout(req->q))) + ublk_put_req_ref(ubq, req); + + return 0; +} + +static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) +{ + struct request *req = io->req; + + /* + * We have handled UBLK_IO_NEED_GET_DATA command, + * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just + * do the copy work. + */ + io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA; + /* update iod->addr because ublksrv may have passed a new io buffer */ + ublk_get_iod(ubq, req->tag)->addr = io->addr; + pr_devel("%s: update iod->addr: qid %d tag %d io_flags %x addr %llx\n", + __func__, ubq->q_id, req->tag, io->flags, + ublk_get_iod(ubq, req->tag)->addr); + + return ublk_start_io(ubq, req, io); +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, const struct ublksrv_io_cmd *ub_cmd) @@ -2048,7 +2184,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; - struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -2058,9 +2193,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, goto out; ubq = ublk_get_queue(ub, ub_cmd->q_id); - if (!ubq || ub_cmd->q_id != ubq->q_id) - goto out; - if (ubq->ubq_daemon && ubq->ubq_daemon != current) goto out; @@ -2075,6 +2207,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, goto out; } + /* only UBLK_IO_FETCH_REQ is allowed if io is not OWNED_BY_SRV */ + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) && + _IOC_NR(cmd_op) != UBLK_IO_FETCH_REQ) + goto out; + /* * ensure that the user issues UBLK_IO_NEED_GET_DATA * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA. @@ -2092,45 +2229,23 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); if (ret) goto out; break; case UBLK_IO_COMMIT_AND_FETCH_REQ: - req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); - - if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) - goto out; - - if (ublk_need_map_io(ubq)) { - /* - * COMMIT_AND_FETCH_REQ has to provide IO buffer if - * NEED GET DATA is not enabled or it is Read IO. - */ - if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || - req_op(req) == REQ_OP_READ)) - goto out; - } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) { - /* - * User copy requires addr to be unset when command is - * not zone append - */ - ret = -EINVAL; + ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags); + if (ret) goto out; - } - - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_commit_completion(ub, ub_cmd); break; case UBLK_IO_NEED_GET_DATA: - if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) - goto out; - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); - ublk_dispatch_req(ubq, req, issue_flags); - return -EIOCBQUEUED; + io->addr = ub_cmd->addr; + if (!ublk_get_data(ubq, io)) + return -EIOCBQUEUED; + + return UBLK_IO_RES_OK; default: goto out; } @@ -2728,6 +2843,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) return -EINVAL; } + if ((info.flags & UBLK_F_QUIESCE) && !(info.flags & UBLK_F_USER_RECOVERY)) { + pr_warn("UBLK_F_QUIESCE requires UBLK_F_USER_RECOVERY\n"); + return -EINVAL; + } + /* * unprivileged device can't be trusted, but RECOVERY and * RECOVERY_REISSUE still may hang error handling, so can't @@ -2744,8 +2864,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) * For USER_COPY, we depends on userspace to fill request * buffer by pwrite() to ublk char device, which can't be * used for unprivileged device + * + * Same with zero copy or auto buffer register. */ - if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) + if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY | + UBLK_F_AUTO_BUF_REG)) return -EINVAL; } @@ -2803,7 +2926,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) UBLK_F_URING_CMD_COMP_IN_TASK; /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ - if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY | + UBLK_F_AUTO_BUF_REG)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; /* @@ -3106,6 +3230,127 @@ static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header) return 0; } +static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) +{ + struct ublk_param_basic *p = &ub->params.basic; + u64 new_size = header->data[0]; + + mutex_lock(&ub->mutex); + p->dev_sectors = new_size; + set_capacity_and_notify(ub->ub_disk, p->dev_sectors); + mutex_unlock(&ub->mutex); +} + +struct count_busy { + const struct ublk_queue *ubq; + unsigned int nr_busy; +}; + +static bool ublk_count_busy_req(struct request *rq, void *data) +{ + struct count_busy *idle = data; + + if (!blk_mq_request_started(rq) && rq->mq_hctx->driver_data == idle->ubq) + idle->nr_busy += 1; + return true; +} + +/* uring_cmd is guaranteed to be active if the associated request is idle */ +static bool ubq_has_idle_io(const struct ublk_queue *ubq) +{ + struct count_busy data = { + .ubq = ubq, + }; + + blk_mq_tagset_busy_iter(&ubq->dev->tag_set, ublk_count_busy_req, &data); + return data.nr_busy < ubq->q_depth; +} + +/* Wait until each hw queue has at least one idle IO */ +static int ublk_wait_for_idle_io(struct ublk_device *ub, + unsigned int timeout_ms) +{ + unsigned int elapsed = 0; + int ret; + + while (elapsed < timeout_ms && !signal_pending(current)) { + unsigned int queues_cancelable = 0; + int i; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + queues_cancelable += !!ubq_has_idle_io(ubq); + } + + /* + * Each queue needs at least one active command for + * notifying ublk server + */ + if (queues_cancelable == ub->dev_info.nr_hw_queues) + break; + + msleep(UBLK_REQUEUE_DELAY_MS); + elapsed += UBLK_REQUEUE_DELAY_MS; + } + + if (signal_pending(current)) + ret = -EINTR; + else if (elapsed >= timeout_ms) + ret = -EBUSY; + else + ret = 0; + + return ret; +} + +static int ublk_ctrl_quiesce_dev(struct ublk_device *ub, + const struct ublksrv_ctrl_cmd *header) +{ + /* zero means wait forever */ + u64 timeout_ms = header->data[0]; + struct gendisk *disk; + int i, ret = -ENODEV; + + if (!(ub->dev_info.flags & UBLK_F_QUIESCE)) + return -EOPNOTSUPP; + + mutex_lock(&ub->mutex); + disk = ublk_get_disk(ub); + if (!disk) + goto unlock; + if (ub->dev_info.state == UBLK_S_DEV_DEAD) + goto put_disk; + + ret = 0; + /* already in expected state */ + if (ub->dev_info.state != UBLK_S_DEV_LIVE) + goto put_disk; + + /* Mark all queues as canceling */ + blk_mq_quiesce_queue(disk->queue); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + ubq->canceling = true; + } + blk_mq_unquiesce_queue(disk->queue); + + if (!timeout_ms) + timeout_ms = UINT_MAX; + ret = ublk_wait_for_idle_io(ub, timeout_ms); + +put_disk: + ublk_put_disk(disk); +unlock: + mutex_unlock(&ub->mutex); + + /* Cancel pending uring_cmd */ + if (!ret) + ublk_cancel_dev(ub); + return ret; +} + /* * All control commands are sent via /dev/ublk-control, so we have to check * the destination device's permission @@ -3191,6 +3436,8 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, case UBLK_CMD_SET_PARAMS: case UBLK_CMD_START_USER_RECOVERY: case UBLK_CMD_END_USER_RECOVERY: + case UBLK_CMD_UPDATE_SIZE: + case UBLK_CMD_QUIESCE_DEV: mask = MAY_READ | MAY_WRITE; break; default: @@ -3282,6 +3529,13 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, case UBLK_CMD_END_USER_RECOVERY: ret = ublk_ctrl_end_recovery(ub, header); break; + case UBLK_CMD_UPDATE_SIZE: + ublk_ctrl_set_size(ub, header); + ret = 0; + break; + case UBLK_CMD_QUIESCE_DEV: + ret = ublk_ctrl_quiesce_dev(ub, header); + break; default: ret = -EOPNOTSUPP; break; @@ -3315,6 +3569,7 @@ static int __init ublk_init(void) BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET + UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET); + BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8); init_waitqueue_head(&ublk_idr_wq); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7cffea01d868..30bca8cb7106 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -571,7 +571,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); - err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL); + err = blk_rq_map_kern(req, report_buf, report_len, GFP_KERNEL); if (err) goto out; @@ -817,7 +817,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; - err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); + err = blk_rq_map_kern(req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) goto out; diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c new file mode 100644 index 000000000000..553b1a713ab9 --- /dev/null +++ b/drivers/block/zloop.c @@ -0,0 +1,1385 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025, Christoph Hellwig. + * Copyright (c) 2025, Western Digital Corporation or its affiliates. + * + * Zoned Loop Device driver - exports a zoned block device using one file per + * zone as backing storage. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/blk-mq.h> +#include <linux/blkzoned.h> +#include <linux/pagemap.h> +#include <linux/miscdevice.h> +#include <linux/falloc.h> +#include <linux/mutex.h> +#include <linux/parser.h> +#include <linux/seq_file.h> + +/* + * Options for adding (and removing) a device. + */ +enum { + ZLOOP_OPT_ERR = 0, + ZLOOP_OPT_ID = (1 << 0), + ZLOOP_OPT_CAPACITY = (1 << 1), + ZLOOP_OPT_ZONE_SIZE = (1 << 2), + ZLOOP_OPT_ZONE_CAPACITY = (1 << 3), + ZLOOP_OPT_NR_CONV_ZONES = (1 << 4), + ZLOOP_OPT_BASE_DIR = (1 << 5), + ZLOOP_OPT_NR_QUEUES = (1 << 6), + ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), + ZLOOP_OPT_BUFFERED_IO = (1 << 8), +}; + +static const match_table_t zloop_opt_tokens = { + { ZLOOP_OPT_ID, "id=%d" }, + { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" }, + { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" }, + { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" }, + { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" }, + { ZLOOP_OPT_BASE_DIR, "base_dir=%s" }, + { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, + { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, + { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, + { ZLOOP_OPT_ERR, NULL } +}; + +/* Default values for the "add" operation. */ +#define ZLOOP_DEF_ID -1 +#define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) +#define ZLOOP_DEF_NR_ZONES 64 +#define ZLOOP_DEF_NR_CONV_ZONES 8 +#define ZLOOP_DEF_BASE_DIR "/var/local/zloop" +#define ZLOOP_DEF_NR_QUEUES 1 +#define ZLOOP_DEF_QUEUE_DEPTH 128 +#define ZLOOP_DEF_BUFFERED_IO false + +/* Arbitrary limit on the zone size (16GB). */ +#define ZLOOP_MAX_ZONE_SIZE_MB 16384 + +struct zloop_options { + unsigned int mask; + int id; + sector_t capacity; + sector_t zone_size; + sector_t zone_capacity; + unsigned int nr_conv_zones; + char *base_dir; + unsigned int nr_queues; + unsigned int queue_depth; + bool buffered_io; +}; + +/* + * Device states. + */ +enum { + Zlo_creating = 0, + Zlo_live, + Zlo_deleting, +}; + +enum zloop_zone_flags { + ZLOOP_ZONE_CONV = 0, + ZLOOP_ZONE_SEQ_ERROR, +}; + +struct zloop_zone { + struct file *file; + + unsigned long flags; + struct mutex lock; + enum blk_zone_cond cond; + sector_t start; + sector_t wp; + + gfp_t old_gfp_mask; +}; + +struct zloop_device { + unsigned int id; + unsigned int state; + + struct blk_mq_tag_set tag_set; + struct gendisk *disk; + + struct workqueue_struct *workqueue; + bool buffered_io; + + const char *base_dir; + struct file *data_dir; + + unsigned int zone_shift; + sector_t zone_size; + sector_t zone_capacity; + unsigned int nr_zones; + unsigned int nr_conv_zones; + unsigned int block_size; + + struct zloop_zone zones[] __counted_by(nr_zones); +}; + +struct zloop_cmd { + struct work_struct work; + atomic_t ref; + sector_t sector; + sector_t nr_sectors; + long ret; + struct kiocb iocb; + struct bio_vec *bvec; +}; + +static DEFINE_IDR(zloop_index_idr); +static DEFINE_MUTEX(zloop_ctl_mutex); + +static unsigned int rq_zone_no(struct request *rq) +{ + struct zloop_device *zlo = rq->q->queuedata; + + return blk_rq_pos(rq) >> zlo->zone_shift; +} + +static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + struct kstat stat; + sector_t file_sectors; + int ret; + + lockdep_assert_held(&zone->lock); + + ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); + if (ret < 0) { + pr_err("Failed to get zone %u file stat (err=%d)\n", + zone_no, ret); + set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + return ret; + } + + file_sectors = stat.size >> SECTOR_SHIFT; + if (file_sectors > zlo->zone_capacity) { + pr_err("Zone %u file too large (%llu sectors > %llu)\n", + zone_no, file_sectors, zlo->zone_capacity); + return -EINVAL; + } + + if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { + pr_err("Zone %u file size not aligned to block size %u\n", + zone_no, zlo->block_size); + return -EINVAL; + } + + if (!file_sectors) { + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + } else if (file_sectors == zlo->zone_capacity) { + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = zone->start + zlo->zone_size; + } else { + zone->cond = BLK_ZONE_COND_CLOSED; + zone->wp = zone->start + file_sectors; + } + + return 0; +} + +static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + int ret = 0; + + if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) + return -EIO; + + mutex_lock(&zone->lock); + + if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { + ret = zloop_update_seq_zone(zlo, zone_no); + if (ret) + goto unlock; + } + + switch (zone->cond) { + case BLK_ZONE_COND_EXP_OPEN: + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_CLOSED: + case BLK_ZONE_COND_IMP_OPEN: + zone->cond = BLK_ZONE_COND_EXP_OPEN; + break; + case BLK_ZONE_COND_FULL: + default: + ret = -EIO; + break; + } + +unlock: + mutex_unlock(&zone->lock); + + return ret; +} + +static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + int ret = 0; + + if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) + return -EIO; + + mutex_lock(&zone->lock); + + if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { + ret = zloop_update_seq_zone(zlo, zone_no); + if (ret) + goto unlock; + } + + switch (zone->cond) { + case BLK_ZONE_COND_CLOSED: + break; + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + if (zone->wp == zone->start) + zone->cond = BLK_ZONE_COND_EMPTY; + else + zone->cond = BLK_ZONE_COND_CLOSED; + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + default: + ret = -EIO; + break; + } + +unlock: + mutex_unlock(&zone->lock); + + return ret; +} + +static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + int ret = 0; + + if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) + return -EIO; + + mutex_lock(&zone->lock); + + if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && + zone->cond == BLK_ZONE_COND_EMPTY) + goto unlock; + + if (vfs_truncate(&zone->file->f_path, 0)) { + set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + ret = -EIO; + goto unlock; + } + + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + +unlock: + mutex_unlock(&zone->lock); + + return ret; +} + +static int zloop_reset_all_zones(struct zloop_device *zlo) +{ + unsigned int i; + int ret; + + for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) { + ret = zloop_reset_zone(zlo, i); + if (ret) + return ret; + } + + return 0; +} + +static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + int ret = 0; + + if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) + return -EIO; + + mutex_lock(&zone->lock); + + if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && + zone->cond == BLK_ZONE_COND_FULL) + goto unlock; + + if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) { + set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + ret = -EIO; + goto unlock; + } + + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = zone->start + zlo->zone_size; + clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + + unlock: + mutex_unlock(&zone->lock); + + return ret; +} + +static void zloop_put_cmd(struct zloop_cmd *cmd) +{ + struct request *rq = blk_mq_rq_from_pdu(cmd); + + if (!atomic_dec_and_test(&cmd->ref)) + return; + kfree(cmd->bvec); + cmd->bvec = NULL; + if (likely(!blk_should_fake_timeout(rq->q))) + blk_mq_complete_request(rq); +} + +static void zloop_rw_complete(struct kiocb *iocb, long ret) +{ + struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb); + + cmd->ret = ret; + zloop_put_cmd(cmd); +} + +static void zloop_rw(struct zloop_cmd *cmd) +{ + struct request *rq = blk_mq_rq_from_pdu(cmd); + struct zloop_device *zlo = rq->q->queuedata; + unsigned int zone_no = rq_zone_no(rq); + sector_t sector = blk_rq_pos(rq); + sector_t nr_sectors = blk_rq_sectors(rq); + bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; + bool is_write = req_op(rq) == REQ_OP_WRITE || is_append; + int rw = is_write ? ITER_SOURCE : ITER_DEST; + struct req_iterator rq_iter; + struct zloop_zone *zone; + struct iov_iter iter; + struct bio_vec tmp; + sector_t zone_end; + int nr_bvec = 0; + int ret; + + atomic_set(&cmd->ref, 2); + cmd->sector = sector; + cmd->nr_sectors = nr_sectors; + cmd->ret = 0; + + /* We should never get an I/O beyond the device capacity. */ + if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { + ret = -EIO; + goto out; + } + zone = &zlo->zones[zone_no]; + zone_end = zone->start + zlo->zone_capacity; + + /* + * The block layer should never send requests that are not fully + * contained within the zone. + */ + if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) { + ret = -EIO; + goto out; + } + + if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { + mutex_lock(&zone->lock); + ret = zloop_update_seq_zone(zlo, zone_no); + mutex_unlock(&zone->lock); + if (ret) + goto out; + } + + if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { + mutex_lock(&zone->lock); + + if (is_append) { + sector = zone->wp; + cmd->sector = sector; + } + + /* + * Write operations must be aligned to the write pointer and + * fully contained within the zone capacity. + */ + if (sector != zone->wp || zone->wp + nr_sectors > zone_end) { + pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", + zone_no, sector, zone->wp); + ret = -EIO; + goto unlock; + } + + /* Implicitly open the target zone. */ + if (zone->cond == BLK_ZONE_COND_CLOSED || + zone->cond == BLK_ZONE_COND_EMPTY) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + /* + * Advance the write pointer of sequential zones. If the write + * fails, the wp position will be corrected when the next I/O + * copmpletes. + */ + zone->wp += nr_sectors; + if (zone->wp == zone_end) + zone->cond = BLK_ZONE_COND_FULL; + } + + rq_for_each_bvec(tmp, rq, rq_iter) + nr_bvec++; + + if (rq->bio != rq->biotail) { + struct bio_vec *bvec; + + cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO); + if (!cmd->bvec) { + ret = -EIO; + goto unlock; + } + + /* + * The bios of the request may be started from the middle of + * the 'bvec' because of bio splitting, so we can't directly + * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec + * API will take care of all details for us. + */ + bvec = cmd->bvec; + rq_for_each_bvec(tmp, rq, rq_iter) { + *bvec = tmp; + bvec++; + } + iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); + } else { + /* + * Same here, this bio may be started from the middle of the + * 'bvec' because of bio splitting, so offset from the bvec + * must be passed to iov iterator + */ + iov_iter_bvec(&iter, rw, + __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter), + nr_bvec, blk_rq_bytes(rq)); + iter.iov_offset = rq->bio->bi_iter.bi_bvec_done; + } + + cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT; + cmd->iocb.ki_filp = zone->file; + cmd->iocb.ki_complete = zloop_rw_complete; + if (!zlo->buffered_io) + cmd->iocb.ki_flags = IOCB_DIRECT; + cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + + if (rw == ITER_SOURCE) + ret = zone->file->f_op->write_iter(&cmd->iocb, &iter); + else + ret = zone->file->f_op->read_iter(&cmd->iocb, &iter); +unlock: + if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) + mutex_unlock(&zone->lock); +out: + if (ret != -EIOCBQUEUED) + zloop_rw_complete(&cmd->iocb, ret); + zloop_put_cmd(cmd); +} + +static void zloop_handle_cmd(struct zloop_cmd *cmd) +{ + struct request *rq = blk_mq_rq_from_pdu(cmd); + struct zloop_device *zlo = rq->q->queuedata; + + switch (req_op(rq)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + case REQ_OP_ZONE_APPEND: + /* + * zloop_rw() always executes asynchronously or completes + * directly. + */ + zloop_rw(cmd); + return; + case REQ_OP_FLUSH: + /* + * Sync the entire FS containing the zone files instead of + * walking all files + */ + cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); + break; + case REQ_OP_ZONE_RESET: + cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); + break; + case REQ_OP_ZONE_RESET_ALL: + cmd->ret = zloop_reset_all_zones(zlo); + break; + case REQ_OP_ZONE_FINISH: + cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq)); + break; + case REQ_OP_ZONE_OPEN: + cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq)); + break; + case REQ_OP_ZONE_CLOSE: + cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq)); + break; + default: + WARN_ON_ONCE(1); + pr_err("Unsupported operation %d\n", req_op(rq)); + cmd->ret = -EOPNOTSUPP; + break; + } + + blk_mq_complete_request(rq); +} + +static void zloop_cmd_workfn(struct work_struct *work) +{ + struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work); + int orig_flags = current->flags; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + zloop_handle_cmd(cmd); + current->flags = orig_flags; +} + +static void zloop_complete_rq(struct request *rq) +{ + struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct zloop_device *zlo = rq->q->queuedata; + unsigned int zone_no = cmd->sector >> zlo->zone_shift; + struct zloop_zone *zone = &zlo->zones[zone_no]; + blk_status_t sts = BLK_STS_OK; + + switch (req_op(rq)) { + case REQ_OP_READ: + if (cmd->ret < 0) + pr_err("Zone %u: failed read sector %llu, %llu sectors\n", + zone_no, cmd->sector, cmd->nr_sectors); + + if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { + /* short read */ + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + } + break; + case REQ_OP_WRITE: + case REQ_OP_ZONE_APPEND: + if (cmd->ret < 0) + pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n", + zone_no, + req_op(rq) == REQ_OP_WRITE ? "" : "append ", + cmd->sector, cmd->nr_sectors); + + if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { + pr_err("Zone %u: partial write %ld/%u B\n", + zone_no, cmd->ret, blk_rq_bytes(rq)); + cmd->ret = -EIO; + } + + if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { + /* + * A write to a sequential zone file failed: mark the + * zone as having an error. This will be corrected and + * cleared when the next IO is submitted. + */ + set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + break; + } + if (req_op(rq) == REQ_OP_ZONE_APPEND) + rq->__sector = cmd->sector; + + break; + default: + break; + } + + if (cmd->ret < 0) + sts = errno_to_blk_status(cmd->ret); + blk_mq_end_request(rq, sts); +} + +static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *rq = bd->rq; + struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct zloop_device *zlo = rq->q->queuedata; + + if (zlo->state == Zlo_deleting) + return BLK_STS_IOERR; + + blk_mq_start_request(rq); + + INIT_WORK(&cmd->work, zloop_cmd_workfn); + queue_work(zlo->workqueue, &cmd->work); + + return BLK_STS_OK; +} + +static const struct blk_mq_ops zloop_mq_ops = { + .queue_rq = zloop_queue_rq, + .complete = zloop_complete_rq, +}; + +static int zloop_open(struct gendisk *disk, blk_mode_t mode) +{ + struct zloop_device *zlo = disk->private_data; + int ret; + + ret = mutex_lock_killable(&zloop_ctl_mutex); + if (ret) + return ret; + + if (zlo->state != Zlo_live) + ret = -ENXIO; + mutex_unlock(&zloop_ctl_mutex); + return ret; +} + +static int zloop_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data) +{ + struct zloop_device *zlo = disk->private_data; + struct blk_zone blkz = {}; + unsigned int first, i; + int ret; + + first = disk_zone_no(disk, sector); + if (first >= zlo->nr_zones) + return 0; + nr_zones = min(nr_zones, zlo->nr_zones - first); + + for (i = 0; i < nr_zones; i++) { + unsigned int zone_no = first + i; + struct zloop_zone *zone = &zlo->zones[zone_no]; + + mutex_lock(&zone->lock); + + if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { + ret = zloop_update_seq_zone(zlo, zone_no); + if (ret) { + mutex_unlock(&zone->lock); + return ret; + } + } + + blkz.start = zone->start; + blkz.len = zlo->zone_size; + blkz.wp = zone->wp; + blkz.cond = zone->cond; + if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { + blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; + blkz.capacity = zlo->zone_size; + } else { + blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ; + blkz.capacity = zlo->zone_capacity; + } + + mutex_unlock(&zone->lock); + + ret = cb(&blkz, i, data); + if (ret) + return ret; + } + + return nr_zones; +} + +static void zloop_free_disk(struct gendisk *disk) +{ + struct zloop_device *zlo = disk->private_data; + unsigned int i; + + for (i = 0; i < zlo->nr_zones; i++) { + struct zloop_zone *zone = &zlo->zones[i]; + + mapping_set_gfp_mask(zone->file->f_mapping, + zone->old_gfp_mask); + fput(zone->file); + } + + fput(zlo->data_dir); + destroy_workqueue(zlo->workqueue); + kfree(zlo->base_dir); + kvfree(zlo); +} + +static const struct block_device_operations zloop_fops = { + .owner = THIS_MODULE, + .open = zloop_open, + .report_zones = zloop_report_zones, + .free_disk = zloop_free_disk, +}; + +__printf(3, 4) +static struct file *zloop_filp_open_fmt(int oflags, umode_t mode, + const char *fmt, ...) +{ + struct file *file; + va_list ap; + char *p; + + va_start(ap, fmt); + p = kvasprintf(GFP_KERNEL, fmt, ap); + va_end(ap); + + if (!p) + return ERR_PTR(-ENOMEM); + file = filp_open(p, oflags, mode); + kfree(p); + return file; +} + +static int zloop_get_block_size(struct zloop_device *zlo, + struct zloop_zone *zone) +{ + struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev; + struct kstat st; + + /* + * If the FS block size is lower than or equal to 4K, use that as the + * device block size. Otherwise, fallback to the FS direct IO alignment + * constraint if that is provided, and to the FS underlying device + * physical block size if the direct IO alignment is unknown. + */ + if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K) + zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; + else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) && + (st.result_mask & STATX_DIOALIGN)) + zlo->block_size = st.dio_offset_align; + else if (sb_bdev) + zlo->block_size = bdev_physical_block_size(sb_bdev); + else + zlo->block_size = SECTOR_SIZE; + + if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { + pr_err("Zone capacity is not aligned to block size %u\n", + zlo->block_size); + return -EINVAL; + } + + return 0; +} + +static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, + unsigned int zone_no, bool restore) +{ + struct zloop_zone *zone = &zlo->zones[zone_no]; + int oflags = O_RDWR; + struct kstat stat; + sector_t file_sectors; + int ret; + + mutex_init(&zone->lock); + zone->start = (sector_t)zone_no << zlo->zone_shift; + + if (!restore) + oflags |= O_CREAT; + + if (!opts->buffered_io) + oflags |= O_DIRECT; + + if (zone_no < zlo->nr_conv_zones) { + /* Conventional zone file. */ + set_bit(ZLOOP_ZONE_CONV, &zone->flags); + zone->cond = BLK_ZONE_COND_NOT_WP; + zone->wp = U64_MAX; + + zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u", + zlo->base_dir, zlo->id, zone_no); + if (IS_ERR(zone->file)) { + pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)", + zone_no, zlo->base_dir, zlo->id, zone_no, + PTR_ERR(zone->file)); + return PTR_ERR(zone->file); + } + + if (!zlo->block_size) { + ret = zloop_get_block_size(zlo, zone); + if (ret) + return ret; + } + + ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); + if (ret < 0) { + pr_err("Failed to get zone %u file stat\n", zone_no); + return ret; + } + file_sectors = stat.size >> SECTOR_SHIFT; + + if (restore && file_sectors != zlo->zone_size) { + pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n", + zone_no, file_sectors, zlo->zone_capacity); + return ret; + } + + ret = vfs_truncate(&zone->file->f_path, + zlo->zone_size << SECTOR_SHIFT); + if (ret < 0) { + pr_err("Failed to truncate zone %u file (err=%d)\n", + zone_no, ret); + return ret; + } + + return 0; + } + + /* Sequential zone file. */ + zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u", + zlo->base_dir, zlo->id, zone_no); + if (IS_ERR(zone->file)) { + pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)", + zone_no, zlo->base_dir, zlo->id, zone_no, + PTR_ERR(zone->file)); + return PTR_ERR(zone->file); + } + + if (!zlo->block_size) { + ret = zloop_get_block_size(zlo, zone); + if (ret) + return ret; + } + + zloop_get_block_size(zlo, zone); + + mutex_lock(&zone->lock); + ret = zloop_update_seq_zone(zlo, zone_no); + mutex_unlock(&zone->lock); + + return ret; +} + +static bool zloop_dev_exists(struct zloop_device *zlo) +{ + struct file *cnv, *seq; + bool exists; + + cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u", + zlo->base_dir, zlo->id, 0); + seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u", + zlo->base_dir, zlo->id, 0); + exists = !IS_ERR(cnv) || !IS_ERR(seq); + + if (!IS_ERR(cnv)) + fput(cnv); + if (!IS_ERR(seq)) + fput(seq); + + return exists; +} + +static int zloop_ctl_add(struct zloop_options *opts) +{ + struct queue_limits lim = { + .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, + .max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT, + .chunk_sectors = opts->zone_size, + .features = BLK_FEAT_ZONED, + }; + unsigned int nr_zones, i, j; + struct zloop_device *zlo; + int ret = -EINVAL; + bool restore; + + __module_get(THIS_MODULE); + + nr_zones = opts->capacity >> ilog2(opts->zone_size); + if (opts->nr_conv_zones >= nr_zones) { + pr_err("Invalid number of conventional zones %u\n", + opts->nr_conv_zones); + goto out; + } + + zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL); + if (!zlo) { + ret = -ENOMEM; + goto out; + } + zlo->state = Zlo_creating; + + ret = mutex_lock_killable(&zloop_ctl_mutex); + if (ret) + goto out_free_dev; + + /* Allocate id, if @opts->id >= 0, we're requesting that specific id */ + if (opts->id >= 0) { + ret = idr_alloc(&zloop_index_idr, zlo, + opts->id, opts->id + 1, GFP_KERNEL); + if (ret == -ENOSPC) + ret = -EEXIST; + } else { + ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL); + } + mutex_unlock(&zloop_ctl_mutex); + if (ret < 0) + goto out_free_dev; + + zlo->id = ret; + zlo->zone_shift = ilog2(opts->zone_size); + zlo->zone_size = opts->zone_size; + if (opts->zone_capacity) + zlo->zone_capacity = opts->zone_capacity; + else + zlo->zone_capacity = zlo->zone_size; + zlo->nr_zones = nr_zones; + zlo->nr_conv_zones = opts->nr_conv_zones; + zlo->buffered_io = opts->buffered_io; + + zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, + opts->nr_queues * opts->queue_depth, zlo->id); + if (!zlo->workqueue) { + ret = -ENOMEM; + goto out_free_idr; + } + + if (opts->base_dir) + zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); + else + zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); + if (!zlo->base_dir) { + ret = -ENOMEM; + goto out_destroy_workqueue; + } + + zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u", + zlo->base_dir, zlo->id); + if (IS_ERR(zlo->data_dir)) { + ret = PTR_ERR(zlo->data_dir); + pr_warn("Failed to open directory %s/%u (err=%d)\n", + zlo->base_dir, zlo->id, ret); + goto out_free_base_dir; + } + + /* + * If we already have zone files, we are restoring a device created by a + * previous add operation. In this case, zloop_init_zone() will check + * that the zone files are consistent with the zone configuration given. + */ + restore = zloop_dev_exists(zlo); + for (i = 0; i < nr_zones; i++) { + ret = zloop_init_zone(zlo, opts, i, restore); + if (ret) + goto out_close_files; + } + + lim.physical_block_size = zlo->block_size; + lim.logical_block_size = zlo->block_size; + + zlo->tag_set.ops = &zloop_mq_ops; + zlo->tag_set.nr_hw_queues = opts->nr_queues; + zlo->tag_set.queue_depth = opts->queue_depth; + zlo->tag_set.numa_node = NUMA_NO_NODE; + zlo->tag_set.cmd_size = sizeof(struct zloop_cmd); + zlo->tag_set.driver_data = zlo; + + ret = blk_mq_alloc_tag_set(&zlo->tag_set); + if (ret) { + pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret); + goto out_close_files; + } + + zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo); + if (IS_ERR(zlo->disk)) { + pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret); + ret = PTR_ERR(zlo->disk); + goto out_cleanup_tags; + } + zlo->disk->flags = GENHD_FL_NO_PART; + zlo->disk->fops = &zloop_fops; + zlo->disk->private_data = zlo; + sprintf(zlo->disk->disk_name, "zloop%d", zlo->id); + set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones); + + ret = blk_revalidate_disk_zones(zlo->disk); + if (ret) + goto out_cleanup_disk; + + ret = add_disk(zlo->disk); + if (ret) { + pr_err("add_disk failed (err=%d)\n", ret); + goto out_cleanup_disk; + } + + mutex_lock(&zloop_ctl_mutex); + zlo->state = Zlo_live; + mutex_unlock(&zloop_ctl_mutex); + + pr_info("Added device %d: %u zones of %llu MB, %u B block size\n", + zlo->id, zlo->nr_zones, + ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, + zlo->block_size); + + return 0; + +out_cleanup_disk: + put_disk(zlo->disk); +out_cleanup_tags: + blk_mq_free_tag_set(&zlo->tag_set); +out_close_files: + for (j = 0; j < i; j++) { + struct zloop_zone *zone = &zlo->zones[j]; + + if (!IS_ERR_OR_NULL(zone->file)) + fput(zone->file); + } + fput(zlo->data_dir); +out_free_base_dir: + kfree(zlo->base_dir); +out_destroy_workqueue: + destroy_workqueue(zlo->workqueue); +out_free_idr: + mutex_lock(&zloop_ctl_mutex); + idr_remove(&zloop_index_idr, zlo->id); + mutex_unlock(&zloop_ctl_mutex); +out_free_dev: + kvfree(zlo); +out: + module_put(THIS_MODULE); + if (ret == -ENOENT) + ret = -EINVAL; + return ret; +} + +static int zloop_ctl_remove(struct zloop_options *opts) +{ + struct zloop_device *zlo; + int ret; + + if (!(opts->mask & ZLOOP_OPT_ID)) { + pr_err("No ID specified\n"); + return -EINVAL; + } + + ret = mutex_lock_killable(&zloop_ctl_mutex); + if (ret) + return ret; + + zlo = idr_find(&zloop_index_idr, opts->id); + if (!zlo || zlo->state == Zlo_creating) { + ret = -ENODEV; + } else if (zlo->state == Zlo_deleting) { + ret = -EINVAL; + } else { + idr_remove(&zloop_index_idr, zlo->id); + zlo->state = Zlo_deleting; + } + + mutex_unlock(&zloop_ctl_mutex); + if (ret) + return ret; + + del_gendisk(zlo->disk); + put_disk(zlo->disk); + blk_mq_free_tag_set(&zlo->tag_set); + + pr_info("Removed device %d\n", opts->id); + + module_put(THIS_MODULE); + + return 0; +} + +static int zloop_parse_options(struct zloop_options *opts, const char *buf) +{ + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + unsigned int token; + int ret = 0; + + /* Set defaults. */ + opts->mask = 0; + opts->id = ZLOOP_DEF_ID; + opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; + opts->zone_size = ZLOOP_DEF_ZONE_SIZE; + opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; + opts->nr_queues = ZLOOP_DEF_NR_QUEUES; + opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; + opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; + + if (!buf) + return 0; + + /* Skip leading spaces before the options. */ + while (isspace(*buf)) + buf++; + + options = o = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + /* Parse the options, doing only some light invalid value checks. */ + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, zloop_opt_tokens, args); + opts->mask |= token; + switch (token) { + case ZLOOP_OPT_ID: + if (match_int(args, &opts->id)) { + ret = -EINVAL; + goto out; + } + break; + case ZLOOP_OPT_CAPACITY: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (!token) { + pr_err("Invalid capacity\n"); + ret = -EINVAL; + goto out; + } + opts->capacity = + ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; + break; + case ZLOOP_OPT_ZONE_SIZE: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB || + !is_power_of_2(token)) { + pr_err("Invalid zone size %u\n", token); + ret = -EINVAL; + goto out; + } + opts->zone_size = + ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; + break; + case ZLOOP_OPT_ZONE_CAPACITY: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (!token) { + pr_err("Invalid zone capacity\n"); + ret = -EINVAL; + goto out; + } + opts->zone_capacity = + ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; + break; + case ZLOOP_OPT_NR_CONV_ZONES: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + opts->nr_conv_zones = token; + break; + case ZLOOP_OPT_BASE_DIR: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + kfree(opts->base_dir); + opts->base_dir = p; + break; + case ZLOOP_OPT_NR_QUEUES: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (!token) { + pr_err("Invalid number of queues\n"); + ret = -EINVAL; + goto out; + } + opts->nr_queues = min(token, num_online_cpus()); + break; + case ZLOOP_OPT_QUEUE_DEPTH: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (!token) { + pr_err("Invalid queue depth\n"); + ret = -EINVAL; + goto out; + } + opts->queue_depth = token; + break; + case ZLOOP_OPT_BUFFERED_IO: + opts->buffered_io = true; + break; + case ZLOOP_OPT_ERR: + default: + pr_warn("unknown parameter or missing value '%s'\n", p); + ret = -EINVAL; + goto out; + } + } + + ret = -EINVAL; + if (opts->capacity <= opts->zone_size) { + pr_err("Invalid capacity\n"); + goto out; + } + + if (opts->zone_capacity > opts->zone_size) { + pr_err("Invalid zone capacity\n"); + goto out; + } + + ret = 0; +out: + kfree(options); + return ret; +} + +enum { + ZLOOP_CTL_ADD, + ZLOOP_CTL_REMOVE, +}; + +static struct zloop_ctl_op { + int code; + const char *name; +} zloop_ctl_ops[] = { + { ZLOOP_CTL_ADD, "add" }, + { ZLOOP_CTL_REMOVE, "remove" }, + { -1, NULL }, +}; + +static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *pos) +{ + struct zloop_options opts = { }; + struct zloop_ctl_op *op; + const char *buf, *opts_buf; + int i, ret; + + if (count > PAGE_SIZE) + return -ENOMEM; + + buf = memdup_user_nul(ubuf, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) { + op = &zloop_ctl_ops[i]; + if (!op->name) { + pr_err("Invalid operation\n"); + ret = -EINVAL; + goto out; + } + if (!strncmp(buf, op->name, strlen(op->name))) + break; + } + + if (count <= strlen(op->name)) + opts_buf = NULL; + else + opts_buf = buf + strlen(op->name); + + ret = zloop_parse_options(&opts, opts_buf); + if (ret) { + pr_err("Failed to parse options\n"); + goto out; + } + + switch (op->code) { + case ZLOOP_CTL_ADD: + ret = zloop_ctl_add(&opts); + break; + case ZLOOP_CTL_REMOVE: + ret = zloop_ctl_remove(&opts); + break; + default: + pr_err("Invalid operation\n"); + ret = -EINVAL; + goto out; + } + +out: + kfree(opts.base_dir); + kfree(buf); + return ret ? ret : count; +} + +static int zloop_ctl_show(struct seq_file *seq_file, void *private) +{ + const struct match_token *tok; + int i; + + /* Add operation */ + seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name); + for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) { + tok = &zloop_opt_tokens[i]; + if (!tok->pattern) + break; + if (i) + seq_putc(seq_file, ','); + seq_puts(seq_file, tok->pattern); + } + seq_putc(seq_file, '\n'); + + /* Remove operation */ + seq_puts(seq_file, zloop_ctl_ops[1].name); + seq_puts(seq_file, " id=%d\n"); + + return 0; +} + +static int zloop_ctl_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + return single_open(file, zloop_ctl_show, NULL); +} + +static int zloop_ctl_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations zloop_ctl_fops = { + .owner = THIS_MODULE, + .open = zloop_ctl_open, + .release = zloop_ctl_release, + .write = zloop_ctl_write, + .read = seq_read, +}; + +static struct miscdevice zloop_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "zloop-control", + .fops = &zloop_ctl_fops, +}; + +static int __init zloop_init(void) +{ + int ret; + + ret = misc_register(&zloop_misc); + if (ret) { + pr_err("Failed to register misc device: %d\n", ret); + return ret; + } + pr_info("Module loaded\n"); + + return 0; +} + +static void __exit zloop_exit(void) +{ + misc_deregister(&zloop_misc); + idr_destroy(&zloop_index_idr); +} + +module_init(zloop_init); +module_exit(zloop_exit); + +MODULE_DESCRIPTION("Zoned loopback device"); +MODULE_LICENSE("GPL"); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a42dedb78e0a..256b451bbe06 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3014,9 +3014,8 @@ static void btusb_coredump_qca(struct hci_dev *hdev) static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { int ret = 0; + unsigned int skip = 0; u8 pkt_type; - u8 *sk_ptr; - unsigned int sk_len; u16 seqno; u32 dump_size; @@ -3025,18 +3024,13 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) struct usb_device *udev = btdata->udev; pkt_type = hci_skb_pkt_type(skb); - sk_ptr = skb->data; - sk_len = skb->len; + skip = sizeof(struct hci_event_hdr); + if (pkt_type == HCI_ACLDATA_PKT) + skip += sizeof(struct hci_acl_hdr); - if (pkt_type == HCI_ACLDATA_PKT) { - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - } - - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + skb_pull(skb, skip); + dump_hdr = (struct qca_dump_hdr *)skb->data; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3056,16 +3050,15 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) btdata->qca_dump.ram_dump_size = dump_size; btdata->qca_dump.ram_dump_seqno = 0; - sk_ptr += offsetof(struct qca_dump_hdr, data0); - sk_len -= offsetof(struct qca_dump_hdr, data0); + + skb_pull(skb, offsetof(struct qca_dump_hdr, data0)); usb_disable_autosuspend(udev); bt_dev_info(hdev, "%s memdump size(%u)\n", (pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event", dump_size); } else { - sk_ptr += offsetof(struct qca_dump_hdr, data); - sk_len -= offsetof(struct qca_dump_hdr, data); + skb_pull(skb, offsetof(struct qca_dump_hdr, data)); } if (!btdata->qca_dump.ram_dump_size) { @@ -3085,7 +3078,6 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } - skb_pull(skb, skb->len - sk_len); hci_devcd_append(hdev, skb); btdata->qca_dump.ram_dump_seqno++; if (seqno == QCA_LAST_SEQUENCE_NUM) { @@ -3113,68 +3105,58 @@ out: /* Return: true if the ACL packet is a dump packet, false otherwise. */ static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + if (!clone) return false; - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - - if ((event_hdr->evt != HCI_VENDOR_PKT) || - (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return false; + acl_hdr = skb_pull_data(clone, sizeof(*acl_hdr)); + if (!acl_hdr || (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)) + goto out; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } /* Return: true if the event packet is a dump packet, false otherwise. */ static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - event_hdr = hci_event_hdr(skb); - - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + if (!clone) return false; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index b163e043c687..21a10552da61 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3677,8 +3677,7 @@ static void cdrom_sysctl_register(void) static void cdrom_sysctl_unregister(void) { - if (cdrom_sysctl_header) - unregister_sysctl_table(cdrom_sysctl_header); + unregister_sysctl_table(cdrom_sysctl_header); } #else /* CONFIG_SYSCTL */ diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index 014db6386624..8ddf3a9a53df 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -137,6 +137,8 @@ static int s2mps11_clk_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; + clk_data->num = S2MPS11_CLKS_NUM; + switch (hwid) { case S2MPS11X: s2mps11_reg = S2MPS11_REG_RTC_CTRL; @@ -186,7 +188,6 @@ static int s2mps11_clk_probe(struct platform_device *pdev) clk_data->hws[i] = &s2mps11_clks[i].hw; } - clk_data->num = S2MPS11_CLKS_NUM; of_clk_add_hw_provider(s2mps11_clks->clk_np, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/rockchip/clk-rk3576.c b/drivers/clk/rockchip/clk-rk3576.c index 595e010341f7..be703f250197 100644 --- a/drivers/clk/rockchip/clk-rk3576.c +++ b/drivers/clk/rockchip/clk-rk3576.c @@ -541,6 +541,8 @@ static struct rockchip_clk_branch rk3576_clk_branches[] __initdata = { RK3576_CLKGATE_CON(5), 14, GFLAGS), GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0, RK3576_CLKGATE_CON(5), 15, GFLAGS), + GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0, + RK3576_CLKGATE_CON(6), 0, GFLAGS), COMPOSITE(CLK_MIPI_CAMERAOUT_M0, "clk_mipi_cameraout_m0", mux_24m_spll_gpll_cpll_p, 0, RK3576_CLKSEL_CON(38), 8, 2, MFLAGS, 0, 8, DFLAGS, RK3576_CLKGATE_CON(6), 3, GFLAGS), diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c index bb66c906ebbb..e83d4fd40240 100644 --- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c +++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c @@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = { { .hw = &pll_periph0_2x_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); - -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0", + mmc0_mmc1_parents, 0x830, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); + +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", + mmc0_mmc1_parents, 0x834, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static const struct clk_parent_data mmc2_parents[] = { { .fw_name = "hosc" }, @@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = { { .hw = &pll_periph0_800M_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents, + 0x838, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws, 0x84c, BIT(0), 0); diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h index b35aeec70484..bb09c649bfa3 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.h +++ b/drivers/clk/sunxi-ng/ccu_mp.h @@ -52,6 +52,28 @@ struct ccu_mp { } \ } +#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \ + _reg, \ + _mshift, _mwidth, \ + _pshift, _pwidth, \ + _muxshift, _muxwidth, \ + _gate, _postdiv, _flags)\ + struct ccu_mp _struct = { \ + .enable = _gate, \ + .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ + .p = _SUNXI_CCU_DIV(_pshift, _pwidth), \ + .mux = _SUNXI_CCU_MUX(_muxshift, _muxwidth), \ + .fixed_post_div = _postdiv, \ + .common = { \ + .reg = _reg, \ + .features = CCU_FEATURE_FIXED_POSTDIV, \ + .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, \ + _parents, \ + &ccu_mp_ops, \ + _flags), \ + } \ + } + #define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg, \ _mshift, _mwidth, \ _pshift, _pwidth, \ @@ -109,8 +131,7 @@ struct ccu_mp { _mshift, _mwidth, \ _pshift, _pwidth, \ _muxshift, _muxwidth, \ - _gate, _features, \ - _flags) \ + _gate, _flags, _features) \ struct ccu_mp _struct = { \ .enable = _gate, \ .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5f8d010516f0..b1ef4546346d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, count++; dma_resv_list_set(fobj, i, fence, usage); - /* pointer update must be visible before we extend the num_fences */ - smp_store_mb(fobj->num_fences, count); + /* fence update must be visible before we extend the num_fences */ + smp_wmb(); + fobj->num_fences = count; } EXPORT_SYMBOL(dma_resv_add_fence); diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 715ac3ae067b..81339664036f 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -342,6 +342,9 @@ static void pt_cmd_callback_work(void *data, int err) struct pt_dma_chan *chan; unsigned long flags; + if (!desc) + return; + dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); @@ -355,16 +358,14 @@ static void pt_cmd_callback_work(void *data, int err) desc->status = DMA_ERROR; spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { - if (desc->status != DMA_COMPLETE) { - if (desc->status != DMA_ERROR) - desc->status = DMA_COMPLETE; + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; - dma_cookie_complete(tx_desc); - dma_descriptor_unmap(tx_desc); - } else { - tx_desc = NULL; - } + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; } spin_unlock_irqrestore(&chan->vc.lock, flags); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d891dfca358e..91b2fbc0b864 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -841,9 +841,9 @@ static int dmatest_func(void *data) } else { dma_async_issue_pending(chan); - wait_event_timeout(thread->done_wait, - done->done, - msecs_to_jiffies(params->timeout)); + wait_event_freezable_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 756d67325db5..66bfa28d984e 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -57,7 +57,7 @@ static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id) intr = edma_readl_chreg(fsl_chan, ch_int); if (!intr) - return IRQ_HANDLED; + return IRQ_NONE; edma_writel_chreg(fsl_chan, 1, ch_int); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index ff94ee892339..6d12033649f8 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -222,7 +222,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_wq *wq; struct device *dev, *fdev; int rc = 0; - struct iommu_sva *sva; + struct iommu_sva *sva = NULL; unsigned int pasid; struct idxd_cdev *idxd_cdev; @@ -317,7 +317,7 @@ failed_set_pasid: if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); failed_get_pasid: - if (device_user_pasid_enabled(idxd)) + if (device_user_pasid_enabled(idxd) && !IS_ERR_OR_NULL(sva)) iommu_sva_unbind_device(sva); failed: mutex_unlock(&wq->wq_lock); @@ -407,6 +407,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) return -EPERM; + if (current->mm != ctx->mm) + return -EPERM; + rc = check_vma(wq, vma, __func__); if (rc < 0) return rc; @@ -473,6 +476,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t ssize_t written = 0; int i; + if (current->mm != ctx->mm) + return -EPERM; + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { int rc = idxd_submit_user_descriptor(ctx, udesc + i); @@ -493,6 +499,9 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_device *idxd = wq->idxd; __poll_t out = 0; + if (current->mm != ctx->mm) + return POLLNVAL; + poll_wait(filp, &wq->err_queue, wait); spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fca1d2924999..760b7d81fcd8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -155,6 +155,25 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) pci_free_irq_vectors(pdev); } +static void idxd_clean_wqs(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); + conf_dev = wq_confdev(wq); + put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + kfree(idxd->wqs); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -169,8 +188,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); if (!idxd->wq_enable_map) { - kfree(idxd->wqs); - return -ENOMEM; + rc = -ENOMEM; + goto err_bitmap; } for (i = 0; i < idxd->max_wqs; i++) { @@ -189,10 +208,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) conf_dev->bus = &dsa_bus_type; conf_dev->type = &idxd_wq_device_type; rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); - if (rc < 0) { - put_device(conf_dev); + if (rc < 0) goto err; - } mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); @@ -203,7 +220,6 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { - put_device(conf_dev); rc = -ENOMEM; goto err; } @@ -211,9 +227,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (idxd->hw.wq_cap.op_config) { wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); if (!wq->opcap_bmap) { - put_device(conf_dev); rc = -ENOMEM; - goto err; + goto err_opcap_bmap; } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -224,15 +239,46 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; - err: +err_opcap_bmap: + kfree(wq->wqcfg); + +err: + put_device(conf_dev); + kfree(wq); + while (--i >= 0) { wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); conf_dev = wq_confdev(wq); put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + +err_bitmap: + kfree(idxd->wqs); + return rc; } +static void idxd_clean_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + kfree(engine); + } + kfree(idxd->engines); +} + static int idxd_setup_engines(struct idxd_device *idxd) { struct idxd_engine *engine; @@ -263,6 +309,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { put_device(conf_dev); + kfree(engine); goto err; } @@ -276,10 +323,26 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine = idxd->engines[i]; conf_dev = engine_confdev(engine); put_device(conf_dev); + kfree(engine); } + kfree(idxd->engines); + return rc; } +static void idxd_clean_groups(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + kfree(group); + } + kfree(idxd->groups); +} + static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -310,6 +373,7 @@ static int idxd_setup_groups(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { put_device(conf_dev); + kfree(group); goto err; } @@ -334,20 +398,18 @@ static int idxd_setup_groups(struct idxd_device *idxd) while (--i >= 0) { group = idxd->groups[i]; put_device(group_confdev(group)); + kfree(group); } + kfree(idxd->groups); + return rc; } static void idxd_cleanup_internals(struct idxd_device *idxd) { - int i; - - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_groups(idxd); + idxd_clean_engines(idxd); + idxd_clean_wqs(idxd); destroy_workqueue(idxd->wq); } @@ -390,7 +452,7 @@ static int idxd_init_evl(struct idxd_device *idxd) static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc, i; + int rc; init_waitqueue_head(&idxd->cmd_waitq); @@ -421,14 +483,11 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_evl: destroy_workqueue(idxd->wq); err_wkq_create: - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); + idxd_clean_groups(idxd); err_group: - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); + idxd_clean_engines(idxd); err_engine: - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_wqs(idxd); err_wqs: return rc; } @@ -528,6 +587,17 @@ static void idxd_read_caps(struct idxd_device *idxd) idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } +static void idxd_free(struct idxd_device *idxd) +{ + if (!idxd) + return; + + put_device(idxd_confdev(idxd)); + bitmap_free(idxd->opcap_bmap); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; @@ -545,28 +615,34 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) - return NULL; + goto err_ida; idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); - if (!idxd->opcap_bmap) { - ida_free(&idxd_ida, idxd->id); - return NULL; - } + if (!idxd->opcap_bmap) + goto err_opcap; device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; conf_dev->type = idxd->data->dev_type; rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); - if (rc < 0) { - put_device(conf_dev); - return NULL; - } + if (rc < 0) + goto err_name; spin_lock_init(&idxd->dev_lock); spin_lock_init(&idxd->cmd_lock); return idxd; + +err_name: + put_device(conf_dev); + bitmap_free(idxd->opcap_bmap); +err_opcap: + ida_free(&idxd_ida, idxd->id); +err_ida: + kfree(idxd); + + return NULL; } static int idxd_enable_system_pasid(struct idxd_device *idxd) @@ -1190,7 +1266,7 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, err: pci_iounmap(pdev, idxd->reg_base); err_iomap: - put_device(idxd_confdev(idxd)); + idxd_free(idxd); err_idxd_alloc: pci_disable_device(pdev); return rc; @@ -1232,7 +1308,6 @@ static void idxd_shutdown(struct pci_dev *pdev) static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - struct idxd_irq_entry *irq_entry; idxd_unregister_devices(idxd); /* @@ -1245,20 +1320,12 @@ static void idxd_remove(struct pci_dev *pdev) get_device(idxd_confdev(idxd)); device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); - if (device_pasid_enabled(idxd)) - idxd_disable_system_pasid(idxd); idxd_device_remove_debugfs(idxd); - - irq_entry = idxd_get_ie(idxd, 0); - free_irq(irq_entry->vector, irq_entry); - pci_free_irq_vectors(pdev); + idxd_cleanup(idxd); pci_iounmap(pdev, idxd->reg_base); - if (device_user_pasid_enabled(idxd)) - idxd_disable_sva(pdev); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); - perfmon_pmu_remove(idxd); put_device(idxd_confdev(idxd)); + idxd_free(idxd); + pci_disable_device(pdev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index d5ddb4e30e71..47c8adfdc155 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -420,15 +420,11 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, { struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); struct virt_dma_desc *vd; - unsigned long flags; - spin_lock_irqsave(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->pc->queue, node) if (vd->tx.cookie == cookie) { - spin_unlock_irqrestore(&cvc->pc->lock, flags); return vd; } - spin_unlock_irqrestore(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->vc.desc_issued, node) if (vd->tx.cookie == cookie) @@ -452,9 +448,11 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, if (ret == DMA_COMPLETE || !txstate) return ret; + spin_lock_irqsave(&cvc->pc->lock, flags); spin_lock_irqsave(&cvc->vc.lock, flags); vd = mtk_cqdma_find_active_desc(c, cookie); spin_unlock_irqrestore(&cvc->vc.lock, flags); + spin_unlock_irqrestore(&cvc->pc->lock, flags); if (vd) { cvd = to_cqdma_vdesc(vd); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b223a7aacb0c..b6255c0601bb 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -1091,8 +1091,11 @@ static void udma_check_tx_completion(struct work_struct *work) u32 residue_diff; ktime_t time_diff; unsigned long delay; + unsigned long flags; while (1) { + spin_lock_irqsave(&uc->vc.lock, flags); + if (uc->desc) { /* Get previous residue and time stamp */ residue_diff = uc->tx_drain.residue; @@ -1127,6 +1130,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + spin_unlock_irqrestore(&uc->vc.lock, flags); + usleep_range(ktime_to_us(delay), ktime_to_us(delay) + 10); continue; @@ -1143,6 +1148,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + + spin_unlock_irqrestore(&uc->vc.lock, flags); } static irqreturn_t udma_ring_irq_handler(int irq, void *data) @@ -4246,7 +4253,6 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct udma_dev *ud = ofdma->of_dma_data; - dma_cap_mask_t mask = ud->ddev.cap_mask; struct udma_filter_param filter_param; struct dma_chan *chan; @@ -4278,7 +4284,7 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, } } - chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param, + chan = __dma_request_channel(&ud->ddev.cap_mask, udma_dma_filter_fn, &filter_param, ofdma->of_node); if (!chan) { dev_err(ud->dev, "get channel fail in %s.\n", __func__); diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index a85b2dbdd9f0..15e991b99f5a 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -185,6 +185,29 @@ struct acpm_match_data { #define handle_to_acpm_info(h) container_of(h, struct acpm_info, handle) /** + * acpm_get_saved_rx() - get the response if it was already saved. + * @achan: ACPM channel info. + * @xfer: reference to the transfer to get response for. + * @tx_seqnum: xfer TX sequence number. + */ +static void acpm_get_saved_rx(struct acpm_chan *achan, + const struct acpm_xfer *xfer, u32 tx_seqnum) +{ + const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1]; + u32 rx_seqnum; + + if (!rx_data->response) + return; + + rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]); + + if (rx_seqnum == tx_seqnum) { + memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); + clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); + } +} + +/** * acpm_get_rx() - get response from RX queue. * @achan: ACPM channel info. * @xfer: reference to the transfer to get response for. @@ -204,15 +227,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) rx_front = readl(achan->rx.front); i = readl(achan->rx.rear); - /* Bail out if RX is empty. */ - if (i == rx_front) + tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); + + if (i == rx_front) { + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; + } base = achan->rx.base; mlen = achan->mlen; - tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); - /* Drain RX queue. */ do { /* Read RX seqnum. */ @@ -259,16 +283,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) * If the response was not in this iteration of the queue, check if the * RX data was previously saved. */ - rx_data = &achan->rx_data[tx_seqnum - 1]; - if (!rx_set && rx_data->response) { - rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, - rx_data->cmd[0]); - - if (rx_seqnum == tx_seqnum) { - memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); - clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); - } - } + if (!rx_set) + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; } diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 442435ded020..13cc120cf11f 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1204,6 +1204,8 @@ static int pca953x_restore_context(struct pca953x_chip *chip) guard(mutex)(&chip->i2c_lock); + if (chip->client->irq > 0) + enable_irq(chip->client->irq); regcache_cache_only(chip->regmap, false); regcache_mark_dirty(chip->regmap); ret = pca953x_regcache_sync(chip); @@ -1216,6 +1218,10 @@ static int pca953x_restore_context(struct pca953x_chip *chip) static void pca953x_save_context(struct pca953x_chip *chip) { guard(mutex)(&chip->i2c_lock); + + /* Disable IRQ to prevent early triggering while regmap "cache only" is on */ + if (chip->client->irq > 0) + disable_irq(chip->client->irq); regcache_cache_only(chip->regmap, true); } diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 13407fd4f0eb..eab6726953b4 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -401,10 +401,15 @@ static ssize_t gpio_virtuser_direction_do_write(struct file *file, char buf[32], *trimmed; int ret, dir, val = 0; - ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (count >= sizeof(buf)) + return -EINVAL; + + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); if (ret < 0) return ret; + buf[ret] = '\0'; + trimmed = strim(buf); if (strcmp(trimmed, "input") == 0) { @@ -623,12 +628,15 @@ static ssize_t gpio_virtuser_consumer_write(struct file *file, char buf[GPIO_VIRTUSER_NAME_BUF_LEN + 2]; int ret; + if (count >= sizeof(buf)) + return -EINVAL; + ret = simple_write_to_buffer(buf, GPIO_VIRTUSER_NAME_BUF_LEN, ppos, user_buf, count); if (ret < 0) return ret; - buf[strlen(buf) - 1] = '\0'; + buf[ret] = '\0'; ret = gpiod_set_consumer_name(data->ad.desc, buf); if (ret) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index cd4fecbb41f2..113c5d90f2df 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -742,6 +742,12 @@ EXPORT_SYMBOL_GPL(gpiochip_query_valid_mask); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset) { + /* + * hog pins are requested before registering GPIO chip + */ + if (!gc->gpiodev) + return true; + /* No mask means all valid */ if (likely(!gc->gpiodev->valid_mask)) return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b6..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e74e26b6a4f2..fec9a007533a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index a1171e6152ed..f11df9c2ec13 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1023,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1205,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 64df8ca448b3..a187cdb43e7e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) @@ -3467,11 +3469,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } - - /* leave display off for S4 sequence */ - if (adev->in_s4) - return 0; - /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); @@ -12763,7 +12760,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length) + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 074b79fd5822..5cdbc86ef8f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -62,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -77,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -100,9 +106,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, */ if (payload.write && result >= 0) { if (result) { - /*one byte indicating partially written bytes. Force 0 to retry*/ - drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); - result = 0; + /*one byte indicating partially written bytes*/ + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = payload.data[0]; } else if (!payload.reply[0]) /*I2C_ACK|AUX_ACK*/ result = msg->size; @@ -127,11 +133,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } - drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); } if (payload.reply[0]) - drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", payload.reply[0]); return result; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403d..ba4ce8a63158 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || - stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; update->crtc_timing_adjust->timing_adjust_pending = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index d9159ca55412..92f0a099d089 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 1, + .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea672..731fbd4bc600 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; @@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 1236e0f9a256..712aff7e17f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f6..3af6a3402b89 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1980,9 +1980,9 @@ void dcn401_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 268626e73c54..53c961f86d43 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 13bc4c290b17..9edb3247c767 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6596,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->has_hdmi_infoframe = false; info->rgb_quant_range_selectable = false; memset(&info->hdmi, 0, sizeof(info->hdmi)); + memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata)); info->edid_hdmi_rgb444_dc_modes = 0; info->edid_hdmi_ycbcr444_dc_modes = 0; diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index de424e670995..4b2f32889f00 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, lockdep_assert_held(&gpusvm->notifier_lock); if (range->flags.has_dma_mapping) { + struct drm_gpusvm_range_flags flags = { + .__flags = range->flags.__flags, + }; + for (i = 0, j = 0; i < npages; j++) { struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; @@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, dev, *addr); i += 1 << addr->order; } - range->flags.has_devmem_pages = false; - range->flags.has_dma_mapping = false; + + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + flags.has_devmem_pages = false; + flags.has_dma_mapping = false; + WRITE_ONCE(range->flags.__flags, flags.__flags); + range->dpagemap = NULL; } } @@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; + struct drm_gpusvm_range_flags flags; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); @@ -1378,7 +1387,8 @@ map_pages: */ drm_gpusvm_notifier_lock(gpusvm); - if (range->flags.unmapped) { + flags.__flags = range->flags.__flags; + if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; goto err_free; @@ -1454,6 +1464,11 @@ map_pages: goto err_unmap; } + if (ctx->devmem_only) { + err = -EFAULT; + goto err_unmap; + } + addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, @@ -1469,14 +1484,17 @@ map_pages: } i += 1 << order; num_dma_mapped = i; - range->flags.has_dma_mapping = true; + flags.has_dma_mapping = true; } if (zdd) { - range->flags.has_devmem_pages = true; + flags.has_devmem_pages = true; range->dpagemap = dpagemap; } + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + WRITE_ONCE(range->flags.__flags, flags.__flags); + drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: @@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, goto err_finalize; /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(ctx->timeslice_ms); zdd->devmem_allocation = devmem_allocation; /* Owns ref */ err_finalize: @@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index ae3343c81a64..5e784db9f315 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -305,36 +305,20 @@ void __shmem_writeback(size_t size, struct address_space *mapping) .range_end = LLONG_MAX, .for_reclaim = 1, }; - unsigned long i; + struct folio *folio = NULL; + int error = 0; /* * Leave mmapings intact (GTT will have been revoked on unbinding, - * leaving only CPU mmapings around) and add those pages to the LRU + * leaving only CPU mmapings around) and add those folios to the LRU * instead of invoking writeback so they are aged and paged out * as normal. */ - - /* Begin writeback on each dirty page */ - for (i = 0; i < size >> PAGE_SHIFT; i++) { - struct page *page; - - page = find_lock_page(mapping, i); - if (!page) - continue; - - if (!page_mapped(page) && clear_page_dirty_for_io(page)) { - int ret; - - SetPageReclaim(page); - ret = mapping->a_ops->writepage(page, &wbc); - if (!PageWriteback(page)) - ClearPageReclaim(page); - if (!ret) - goto put; - } - unlock_page(page); -put: - put_page(page); + while ((folio = writeback_iter(mapping, &wbc, folio, &error))) { + if (folio_mapped(folio)) + folio_redirty_for_writepage(&wbc, folio); + else + error = shmem_writeout(folio, &wbc); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 7752d8ac85f0..c08fa93e50a3 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -75,7 +75,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, unsigned long long venc_freq; unsigned long long hdmi_freq; - vclk_freq = mode->clock * 1000; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -123,7 +123,7 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned long long clock = mode->clock * 1000; + unsigned long long clock = mode->clock * 1000ULL; unsigned long long phy_freq; unsigned long long vclk_freq; unsigned long long venc_freq; diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index 0460ecaef4bd..23914a9f7fd3 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_client_setup(drm, NULL); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 9e2d72c447ee..ffaab68bd5dd 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -120,13 +120,13 @@ ttm_backup_backup_page(struct file *backup, struct page *page, .for_reclaim = 1, }; folio_set_reclaim(to_folio); - ret = mapping->a_ops->writepage(folio_file_page(to_folio, idx), &wbc); + ret = shmem_writeout(to_folio, &wbc); if (!folio_test_writeback(to_folio)) folio_clear_reclaim(to_folio); /* - * If writepage succeeds, it unlocks the folio. - * writepage() errors are otherwise dropped, since writepage() - * is only best effort here. + * If writeout succeeds, it unlocks the folio. errors + * are otherwise dropped, since writeout is only best + * effort here. */ if (ret) folio_unlock(to_folio); diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 167fb0f742de..5a47991b4b81 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index fb8ec317b6ee..891f928d80ce 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -43,6 +43,10 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GSCCS_RING_BASE 0x11a000 +#define ENGINE_ID(base) XE_REG((base) + 0x8c) +#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4) +#define ENGINE_CLASS_ID REG_GENMASK(2, 0) + #define RING_TAIL(base) XE_REG((base) + 0x30) #define TAIL_ADDR REG_GENMASK(20, 3) @@ -154,6 +158,7 @@ #define STOP_RING REG_BIT(8) #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index da1f198ac107..181913967ac9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 57944f90bbf6..994af591a2e8 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,7 +11,9 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_PER_CTX_PTR (0x12 + 1) #define CTX_TIMESTAMP (0x22 + 1) +#define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f8667ebba85..0482f26aa480 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -330,6 +330,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */ + u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 606922d9dd73..cd9b1c32f30f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 31bc2022bfc2..769781d577df 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) return xe_sched_invalidate_job(job, 2); } - ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); /* diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index df3ceddede07..03bfba696b37 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -24,6 +24,7 @@ #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 #define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) @@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } +static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; +} + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } +static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) DECL_MAP_ADDR_HELPERS(ctx_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) +DECL_MAP_ADDR_HELPERS(engine_id) #undef DECL_MAP_ADDR_HELPERS @@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) } /** + * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp udw GGTT address + */ +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); +} + +/** * xe_lrc_ctx_timestamp() - Read ctx timestamp value * @lrc: Pointer to the lrc. * * Returns: ctx timestamp value */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) { struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map map; + u32 ldw, udw = 0; map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); + ldw = xe_map_read32(xe, &map); + + if (xe->info.has_64bit_timestamp) { + map = __xe_lrc_ctx_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + } + + return (u64)udw << 32 | ldw; } /** @@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_unpin(lrc->bo); xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +/* + * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active + * context run ticks. + * @lrc: Pointer to the lrc. + * + * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the + * context, but only gets updated when the context switches out. In order to + * check how long a context has been active before it switches out, two things + * are required: + * + * (1) Determine if the context is running: + * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in + * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is + * initialized. During a query, we just check for this value to determine if the + * context is active. If the context switched out, it would overwrite this + * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as + * the last part of context restore, so reusing this LRC location will not + * clobber anything. + * + * (2) Calculate the time that the context has been active for: + * The CTX_TIMESTAMP ticks only when the context is active. If a context is + * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. + * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific + * engine instance. Since we do not know which instance the context is running + * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and + * store it in the PPHSWP. + */ +#define CONTEXT_ACTIVE 1ULL +static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +{ + u32 *cmd; + + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + + *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + *cmd++ = ENGINE_ID(0).addr; + *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); + *cmd++ = 0; + + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = lower_32_bits(CONTEXT_ACTIVE); + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = upper_32_bits(CONTEXT_ACTIVE); + } + + *cmd++ = MI_BATCH_BUFFER_END; + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + } #define PVC_CTX_ASID (0x2e + 1) @@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = NULL; u32 arb_enable; u32 lrc_size; + u32 bo_flags; int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + bo_flags); + if (IS_ERR(lrc->bb_per_ctx_bo)) { + err = PTR_ERR(lrc->bb_per_ctx_bo); + goto err_lrc_finish; + } + lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + lrc->ctx_timestamp = 0; xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + xe_lrc_setup_utilization(lrc); + return 0; err_lrc_finish: @@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) return __xe_lrc_parallel_map(lrc); } +/** + * xe_lrc_engine_id() - Read engine id value + * @lrc: Pointer to the lrc. + * + * Returns: context id value + */ +static u32 xe_lrc_engine_id(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_engine_id_map(lrc); + return xe_map_read32(xe, &map); +} + static int instr_dw(u32 cmd_header) { /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ @@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ + u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); + u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe; + u64 val; + + hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); + if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for context utilization\n", + class, instance)) + return -1; + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + val = xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + else + val = xe_mmio_read32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + + *reg_ctx_ts = val; + + return 0; +} + /** * xe_lrc_update_timestamp() - Update ctx timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. + * update saved value. With support for active contexts, the calculation may be + * slightly racy, so follow a read-again logic to ensure that the context is + * still active before returning the right timestamp. * * Returns: New ctx timestamp value */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { + u64 lrc_ts, reg_ts; + u32 engine_id; + *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + lrc_ts = xe_lrc_ctx_timestamp(lrc); + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) { + lrc->ctx_timestamp = lrc_ts; + goto done; + } + + if (lrc_ts == CONTEXT_ACTIVE) { + engine_id = xe_lrc_engine_id(lrc); + if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) + lrc->ctx_timestamp = reg_ts; + + /* read lrc again to ensure context is still active */ + lrc_ts = xe_lrc_ctx_timestamp(lrc); + } + + /* + * If context switched out, just use the lrc_ts. Note that this needs to + * be a separate if condition. + */ + if (lrc_ts != CONTEXT_ACTIVE) + lrc->ctx_timestamp = lrc_ts; +done: trace_xe_lrc_update_timestamp(lrc, *old_ts); return lrc->ctx_timestamp; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 0b40f349ab95..eb6e8de8c939 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); @@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); * * Returns the current LRC timestamp */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811..ae24cf6f8dd9 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 @@ -52,7 +52,10 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ - u32 ctx_timestamp; + u64 ctx_timestamp; + + /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ + struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 70a36e777546..46301f341773 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -75,12 +75,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) * is fine as it's going to the root tile's mmio, that's * guaranteed to be initialized earlier in xe_mmio_probe_early() */ - mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); + xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* @@ -128,7 +128,7 @@ int xe_mmio_probe_early(struct xe_device *xe) */ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); - if (xe->mmio.regs == NULL) { + if (!xe->mmio.regs) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } @@ -309,8 +309,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic, bool expect_match) +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 31dade91a089..0c737413fcb6 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -775,22 +775,23 @@ void xe_mocs_init(struct xe_gt *gt) void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; flags = get_mocs_settings(xe, &table); + domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT; xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), - flags & HAS_LNCF_MOCS ? - XE_FORCEWAKE_ALL : XE_FW_GT); - if (!fw_ref) + fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); + + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) goto err_fw; table.ops->dump(&table, flags, gt, p); - xe_force_wake_put(gt_to_fw(gt), fw_ref); err_fw: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 9f4632e39a1a..e861c694f336 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = { module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); -module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444); -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault"); - module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 84339e509c80..5a3bfea8b7b4 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,6 @@ struct xe_modparam { bool force_execlist; bool probe_display; - bool always_migrate_to_vram; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 818f023166d5..f4d108dc49b1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ + .has_64bit_timestamp = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; + xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; for_each_remote_tile(tile, xe, id) { int err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e9b9bbc138d3..ca6b10d35573 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -21,6 +21,7 @@ struct xe_graphics_desc { u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; + u8 has_64bit_timestamp:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ffaf0d02dc7d..856038553b81 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm, } case DRM_GPUVA_OP_DRIVER: { + /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - op->map_range.range->tile_present |= BIT(tile->id); - op->map_range.range->tile_invalidated &= ~BIT(tile->id); + WRITE_ONCE(op->map_range.range->tile_present, + op->map_range.range->tile_present | + BIT(tile->id)); + WRITE_ONCE(op->map_range.range->tile_invalidated, + op->map_range.range->tile_invalidated & + ~BIT(tile->id)); } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { - op->unmap_range.range->tile_present &= ~BIT(tile->id); + WRITE_ONCE(op->unmap_range.range->tile_present, + op->unmap_range.range->tile_present & + ~BIT(tile->id)); } break; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a7582b097ae6..bc1689db4cd7 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 8184390f9c7b..86d47aaf0358 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) if (!shrinker) return ERR_PTR(-ENOMEM); - shrinker->shrink = shrinker_alloc(0, "xe system shrinker"); + shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 24c578e1170e..975094c1a582 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -15,8 +15,17 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) { - /* Not reliable without notifier lock */ - return range->base.flags.has_devmem_pages; + /* + * Advisory only check whether the range is currently backed by VRAM + * memory. + */ + + struct drm_gpusvm_range_flags flags = { + /* Pairs with WRITE_ONCE in drm_gpusvm.c */ + .__flags = READ_ONCE(range->base.flags.__flags), + }; + + return flags.has_devmem_pages; } static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) @@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm) } static bool xe_svm_range_is_valid(struct xe_svm_range *range, - struct xe_tile *tile) + struct xe_tile *tile, + bool devmem_only) { - return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); + /* + * Advisory only check whether the range currently has a valid mapping, + * READ_ONCE pairs with WRITE_ONCE in xe_pt.c + */ + return ((READ_ONCE(range->tile_present) & + ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && + (!devmem_only || xe_svm_range_in_vram(range)); } static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) @@ -712,6 +728,36 @@ unlock: return err; } +static bool supports_4K_migration(struct xe_device *xe) +{ + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + return false; + + return true; +} + +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, + struct xe_vma *vma) +{ + struct xe_vm *vm = range_to_vm(&range->base); + u64 range_size = xe_svm_range_size(range); + + if (!range->base.flags.migrate_devmem) + return false; + + if (xe_svm_range_in_vram(range)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + return false; + } + + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); + return false; + } + + return true; +} + /** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. @@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + .devmem_only = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + .timeslice_ms = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, }; struct xe_svm_range *range; struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -758,24 +809,31 @@ retry: if (IS_ERR(r)) return PTR_ERR(r); + if (ctx.devmem_only && !r->flags.migrate_devmem) + return -EACCES; + range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile)) + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); - /* XXX: Add migration policy, for now migrate range once */ - if (!range->skip_migrate && range->base.flags.migrate_devmem && - xe_svm_range_size(range) >= SZ_64K) { - range->skip_migrate = true; - + if (--migrate_try_count >= 0 && + xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { - drm_dbg(&vm->xe->drm, - "VRAM allocation failed, falling back to " - "retrying fault, asid=%u, errno=%pe\n", - vm->usm.asid, ERR_PTR(err)); - goto retry; + if (migrate_try_count || !ctx.devmem_only) { + drm_dbg(&vm->xe->drm, + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + goto retry; + } else { + drm_err(&vm->xe->drm, + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + return err; + } } } @@ -783,15 +841,23 @@ retry: err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + if (migrate_try_count > 0 || !ctx.devmem_only) { + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, + &range->base); + } + drm_dbg(&vm->xe->drm, + "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + range_debug(range, "PAGE FAULT - RETRY PAGES"); + goto retry; + } else { + drm_err(&vm->xe->drm, + "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); } - drm_dbg(&vm->xe->drm, - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); - range_debug(range, "PAGE FAULT - RETRY PAGES"); - goto retry; } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); @@ -815,6 +881,7 @@ retry_bind: drm_exec_fini(&exec); err = PTR_ERR(fence); if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ range_debug(range, "PAGE FAULT - RETRY BIND"); goto retry; } @@ -825,9 +892,6 @@ retry_bind: } drm_exec_fini(&exec); - if (xe_modparam.always_migrate_to_vram) - range->skip_migrate = false; - dma_fence_wait(fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index be306fe7aaa4..fe58ac2f4baa 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -36,11 +36,6 @@ struct xe_svm_range { * range. Protected by GPU SVM notifier lock. */ u8 tile_invalidated; - /** - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler - * locking. - */ - u8 skip_migrate :1; }; #if IS_ENABLED(CONFIG_DRM_GPUSVM) diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index 5c669a0b2180..d525cbee1e34 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -19,12 +19,12 @@ #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) TRACE_EVENT(xe_lrc_update_timestamp, - TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_PROTO(struct xe_lrc *lrc, uint64_t old), TP_ARGS(lrc, old), TP_STRUCT__entry( __field(struct xe_lrc *, lrc) - __field(u32, old) - __field(u32, new) + __field(u64, old) + __field(u64, new) __string(name, lrc->fence_ctx.name) __string(device_id, __dev_name_lrc(lrc)) ), @@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp, __assign_str(name); __assign_str(device_id); ), - TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s", __entry->lrc, __get_str(name), __entry->old, __entry->new, __get_str(device_id)) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 24f644c0a673..2f833f0d575f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c index 25f0ebfcbd5f..0a9b44ce4904 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c @@ -83,6 +83,9 @@ static int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) case ALS_IDX: privdata->dev_en.is_als_present = false; break; + case SRA_IDX: + privdata->dev_en.is_sra_present = false; + break; } if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { @@ -134,9 +137,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_sts[i] = SENSOR_DISABLED; - if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX) - break; - if (cl_data->sensor_idx[i] == SRA_IDX) { info.sensor_idx = cl_data->sensor_idx[i]; writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0)); @@ -145,8 +145,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) (privdata, cl_data->sensor_idx[i], ENABLE_SENSOR); cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED; - if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { + cl_data->is_any_sensor_enabled = true; privdata->dev_en.is_sra_present = true; + } continue; } @@ -238,6 +240,8 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) cleanup: amd_sfh_hid_client_deinit(privdata); for (i = 0; i < cl_data->num_hid_devices; i++) { + if (cl_data->sensor_idx[i] == SRA_IDX) + continue; devm_kfree(dev, cl_data->feature_report[i]); devm_kfree(dev, in_data->input_report[i]); devm_kfree(dev, cl_data->report_descr[i]); diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 2e96ec6a3073..9a06f9b0e4ef 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -38,6 +38,9 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type struct hid_bpf_ops *e; int ret; + if (unlikely(hdev->bpf.destroyed)) + return ERR_PTR(-ENODEV); + if (type >= HID_REPORT_TYPES) return ERR_PTR(-EINVAL); @@ -93,6 +96,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + if (rtype >= HID_REPORT_TYPES) return -EINVAL; @@ -130,6 +136,9 @@ int dispatch_hid_bpf_output_report(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + idx = srcu_read_lock(&hdev->bpf.srcu); list_for_each_entry_srcu(e, &hdev->bpf.prog_list, list, srcu_read_lock_held(&hdev->bpf.srcu)) { diff --git a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c index 1a0aeea6a081..a754710fc90b 100644 --- a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c +++ b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c @@ -157,6 +157,7 @@ static const __u8 fixed_rdesc_vendor[] = { ReportCount(5) // padding Input(Const) // Byte 4 in report - just exists so we get to be a tablet pad + UsagePage_Digitizers Usage_Dig_BarrelSwitch // BTN_STYLUS ReportCount(1) ReportSize(1) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 288a2b864cc4..1062731315a2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -41,6 +41,10 @@ #define USB_VENDOR_ID_ACTIONSTAR 0x2101 #define USB_DEVICE_ID_ACTIONSTAR_1011 0x1011 +#define USB_VENDOR_ID_ADATA_XPG 0x125f +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE 0x7505 +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE 0x7506 + #define USB_VENDOR_ID_ADS_TECH 0x06e1 #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X 0xa155 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 646171598e41..0731473cc9b1 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -27,6 +27,8 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016), HID_QUIRK_FULLSPEED_INTERVAL }, { HID_USB_DEVICE(USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX), HID_QUIRK_NO_INIT_REPORTS }, diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index dfd9d22ed559..949d307c66a8 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1150,11 +1150,9 @@ static void steam_client_ll_close(struct hid_device *hdev) struct steam_device *steam = hdev->driver_data; unsigned long flags; - bool connected; spin_lock_irqsave(&steam->lock, flags); steam->client_opened--; - connected = steam->connected && !steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); schedule_work(&steam->unregister_work); diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 3b81468a1df2..0bf70664c35e 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -174,6 +174,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { + kfree(send_buf); hid_err(hdev, "Unexpected non-int endpoint\n"); return; } diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index a367df6ea01f..61a4019ddc74 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -142,11 +142,12 @@ static int uclogic_input_configured(struct hid_device *hdev, suffix = "System Control"; break; } - } - - if (suffix) + } else { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 1556d4287fa5..eaf099b2efdb 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -70,10 +70,16 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, { while (!kfifo_is_empty(fifo)) { int size = kfifo_peek_len(fifo); - u8 *buf = kzalloc(size, GFP_KERNEL); + u8 *buf; unsigned int count; int err; + buf = kzalloc(size, GFP_KERNEL); + if (!buf) { + kfifo_skip(fifo); + continue; + } + count = kfifo_out(fifo, buf, size); if (count != size) { // Hard to say what is the "right" action in this @@ -81,6 +87,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, // to flush seems reasonable enough, however. hid_warn(hdev, "%s: removed fifo entry with unexpected size\n", __func__); + kfree(buf); continue; } err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false); @@ -2361,6 +2368,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) unsigned int connect_mask = HID_CONNECT_HIDRAW; features->pktlen = wacom_compute_pktlen(hdev); + if (!features->pktlen) + return -ENODEV; if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL)) return -ENOMEM; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fb8cd8469328..35f26fa1ffe7 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -1077,68 +1077,10 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, EXPORT_SYMBOL(vmbus_sendpacket); /* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. This interface allows you - * to control notifying the host. This will be useful for sending - * batched data. Also the sender can control the send flags - * explicitly. - */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) -{ - int i; - struct vmbus_channel_packet_page_buffer desc; - u32 descsize; - u32 packetlen; - u32 packetlen_aligned; - struct kvec bufferlist[3]; - u64 aligned_data = 0; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; - - /* - * Adjust the size down since vmbus_channel_packet_page_buffer is the - * largest size we support - */ - descsize = sizeof(struct vmbus_channel_packet_page_buffer) - - ((MAX_PAGE_BUFFER_COUNT - pagecount) * - sizeof(struct hv_page_buffer)); - packetlen = descsize + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); - - /* Setup the descriptor */ - desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; - desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ - desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ - desc.reserved = 0; - desc.rangecount = pagecount; - - for (i = 0; i < pagecount; i++) { - desc.range[i].len = pagebuffers[i].len; - desc.range[i].offset = pagebuffers[i].offset; - desc.range[i].pfn = pagebuffers[i].pfn; - } - - bufferlist[0].iov_base = &desc; - bufferlist[0].iov_len = descsize; - bufferlist[1].iov_base = buffer; - bufferlist[1].iov_len = bufferlen; - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - return hv_ringbuffer_write(channel, bufferlist, 3, requestid, NULL); -} -EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); - -/* - * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets * using a GPADL Direct packet type. - * The buffer includes the vmbus descriptor. + * The desc argument must include space for the VMBus descriptor. The + * rangecount field must already be set. */ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *desc, @@ -1160,7 +1102,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->length8 = (u16)(packetlen_aligned >> 3); desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; - desc->rangecount = 1; bufferlist[0].iov_base = desc; bufferlist[0].iov_len = desc_size; diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8e0267c7cc29..f21f9877c040 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -278,9 +278,11 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev, if ((dev->flags & MODEL_MASK) == MODEL_AMD_NAVI_GPU) { dev->slave = i2c_new_ccgx_ucsi(&dev->adapter, dev->irq, &dgpu_node); - if (IS_ERR(dev->slave)) + if (IS_ERR(dev->slave)) { + i2c_del_adapter(&dev->adapter); return dev_err_probe(device, PTR_ERR(dev->slave), "register UCSI failed\n"); + } } pm_runtime_set_autosuspend_delay(device, 1000); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b4e3e4beb7f4..d4263385850a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1352,6 +1352,9 @@ static void ib_device_notify_register(struct ib_device *device) down_read(&devices_rwsem); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); if (ret) goto out; @@ -1468,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name, return ret; } dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_notify_register(device); + ib_device_put(device); return 0; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1ee8969595d3..7599e31b5743 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -221,7 +221,7 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) break; if (i < IRDMA_MIN_MSIX) { - for (; i > 0; i--) + while (--i >= 0) ice_free_rdma_qvector(pf, &rf->msix_entries[i]); kfree(rf->msix_entries); @@ -255,6 +255,8 @@ static void irdma_remove(struct auxiliary_device *aux_dev) ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); irdma_deinit_interrupts(iwdev->rf, pf); + kfree(iwdev->rf); + pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index eeb932e58730..1e8c92826de2 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev) irdma_rt_deinit_hw(iwdev); irdma_ctrl_deinit_hw(iwdev->rf); - kfree(iwdev->rf); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index b9f4a2937c3a..2098de762bf5 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -90,7 +90,7 @@ static int create_file(const char *name, umode_t mode, int error; inode_lock(d_inode(parent)); - *dentry = lookup_one_len(name, parent, strlen(name)); + *dentry = lookup_noperm(&QSTR(name), parent); if (!IS_ERR(*dentry)) error = qibfs_mknod(d_inode(parent), *dentry, mode, fops, data); @@ -433,7 +433,7 @@ static int remove_device_files(struct super_block *sb, char unit[10]; snprintf(unit, sizeof(unit), "%u", dd->unit); - dir = lookup_one_len_unlocked(unit, sb->s_root, strlen(unit)); + dir = lookup_noperm_unlocked(&QSTR(unit), sb->s_root); if (IS_ERR(dir)) { pr_err("Lookup of %s failed\n", unit); diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030ab..fffd144d509e 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 57a5ff3d1992..1008858f78e2 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -290,6 +290,8 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE }, + { 0x10f5, 0x7008, "Turtle Beach Recon Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, + { 0x10f5, 0x7073, "Turtle Beach Stealth Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, @@ -354,6 +356,7 @@ static const struct xpad_device { { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, + { 0x20d6, 0x2064, "PowerA Wired Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index d760af4cc12e..f1947f03b06a 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -4,6 +4,7 @@ * Copyright (C) 2016 Zodiac Inflight Innovations */ +#include "linux/device.h" #include <linux/kernel.h> #include <linux/rmi.h> #include <linux/firmware.h> @@ -289,39 +290,30 @@ static int rmi_f34_update_firmware(struct f34_data *f34, return rmi_f34_flash_firmware(f34, syn_fw); } -static int rmi_f34_status(struct rmi_function *fn) -{ - struct f34_data *f34 = dev_get_drvdata(&fn->dev); - - /* - * The status is the percentage complete, or once complete, - * zero for success or a negative return code. - */ - return f34->update_status; -} - static ssize_t rmi_driver_bootloader_id_show(struct device *dev, struct device_attribute *dattr, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - struct rmi_function *fn = data->f34_container; + struct rmi_function *fn; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); - - if (f34->bl_version == 5) - return sysfs_emit(buf, "%c%c\n", - f34->bootloader_id[0], - f34->bootloader_id[1]); - else - return sysfs_emit(buf, "V%d.%d\n", - f34->bootloader_id[1], - f34->bootloader_id[0]); - } + fn = data->f34_container; + if (!fn) + return -ENODEV; - return 0; + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; + + if (f34->bl_version == 5) + return sysfs_emit(buf, "%c%c\n", + f34->bootloader_id[0], + f34->bootloader_id[1]); + else + return sysfs_emit(buf, "V%d.%d\n", + f34->bootloader_id[1], + f34->bootloader_id[0]); } static DEVICE_ATTR(bootloader_id, 0444, rmi_driver_bootloader_id_show, NULL); @@ -334,13 +326,16 @@ static ssize_t rmi_driver_configuration_id_show(struct device *dev, struct rmi_function *fn = data->f34_container; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); + fn = data->f34_container; + if (!fn) + return -ENODEV; + + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; - return sysfs_emit(buf, "%s\n", f34->configuration_id); - } - return 0; + return sysfs_emit(buf, "%s\n", f34->configuration_id); } static DEVICE_ATTR(configuration_id, 0444, @@ -356,10 +351,14 @@ static int rmi_firmware_update(struct rmi_driver_data *data, if (!data->f34_container) { dev_warn(dev, "%s: No F34 present!\n", __func__); - return -EINVAL; + return -ENODEV; } f34 = dev_get_drvdata(&data->f34_container->dev); + if (!f34) { + dev_warn(dev, "%s: No valid F34 present!\n", __func__); + return -ENODEV; + } if (f34->bl_version >= 7) { if (data->pdt_props & HAS_BSR) { @@ -485,10 +484,18 @@ static ssize_t rmi_driver_update_fw_status_show(struct device *dev, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - int update_status = 0; + struct f34_data *f34; + int update_status = -ENODEV; - if (data->f34_container) - update_status = rmi_f34_status(data->f34_container); + /* + * The status is the percentage complete, or once complete, + * zero for success or a negative return code. + */ + if (data->f34_container) { + f34 = dev_get_drvdata(&data->f34_container->dev); + if (f34) + update_status = f34->update_status; + } return sysfs_emit(buf, "%d\n", update_status); } @@ -508,33 +515,21 @@ static const struct attribute_group rmi_firmware_attr_group = { .attrs = rmi_firmware_attrs, }; -static int rmi_f34_probe(struct rmi_function *fn) +static int rmi_f34v5_probe(struct f34_data *f34) { - struct f34_data *f34; - unsigned char f34_queries[9]; + struct rmi_function *fn = f34->fn; + u8 f34_queries[9]; bool has_config_id; - u8 version = fn->fd.function_version; - int ret; - - f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); - if (!f34) - return -ENOMEM; - - f34->fn = fn; - dev_set_drvdata(&fn->dev, f34); - - /* v5 code only supported version 0, try V7 probe */ - if (version > 0) - return rmi_f34v7_probe(f34); + int error; f34->bl_version = 5; - ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "%s: Failed to query properties\n", __func__); - return ret; + return error; } snprintf(f34->bootloader_id, sizeof(f34->bootloader_id), @@ -560,11 +555,11 @@ static int rmi_f34_probe(struct rmi_function *fn) f34->v5.config_blocks); if (has_config_id) { - ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "Failed to read F34 config ID\n"); - return ret; + return error; } snprintf(f34->configuration_id, sizeof(f34->configuration_id), @@ -573,12 +568,34 @@ static int rmi_f34_probe(struct rmi_function *fn) f34_queries[2], f34_queries[3]); rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n", - f34->configuration_id); + f34->configuration_id); } return 0; } +static int rmi_f34_probe(struct rmi_function *fn) +{ + struct f34_data *f34; + u8 version = fn->fd.function_version; + int error; + + f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); + if (!f34) + return -ENOMEM; + + f34->fn = fn; + + /* v5 code only supported version 0 */ + error = version == 0 ? rmi_f34v5_probe(f34) : rmi_f34v7_probe(f34); + if (error) + return error; + + dev_set_drvdata(&fn->dev, f34); + + return 0; +} + int rmi_f34_create_sysfs(struct rmi_device *rmi_dev) { return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4f91a740c15f..9d728800a862 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, int ret; for_each_group_device(group, device) { - ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, old); - if (ret) - goto err_revert; + if (device->dev->iommu->max_pasids > 0) { + ret = domain->ops->set_dev_pasid(domain, device->dev, + pasid, old); + if (ret) + goto err_revert; + } } return 0; @@ -3379,15 +3381,18 @@ err_revert: for_each_group_device(group, device) { if (device == last_gdev) break; - /* - * If no old domain, undo the succeeded devices/pasid. - * Otherwise, rollback the succeeded devices/pasid to the old - * domain. And it is a driver bug to fail attaching with a - * previously good domain. - */ - if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev, + if (device->dev->iommu->max_pasids > 0) { + /* + * If no old domain, undo the succeeded devices/pasid. + * Otherwise, rollback the succeeded devices/pasid to + * the old domain. And it is a driver bug to fail + * attaching with a previously good domain. + */ + if (!old || + WARN_ON(old->ops->set_dev_pasid(old, device->dev, pasid, domain))) - iommu_remove_dev_pasid(device->dev, pasid, domain); + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } return ret; } @@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, { struct group_device *device; - for_each_group_device(group, device) - iommu_remove_dev_pasid(device->dev, pasid, domain); + for_each_group_device(group, device) { + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } /* @@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); for_each_group_device(group, device) { - if (pasid >= device->dev->iommu->max_pasids) { + /* + * Skip PASID validation for devices without PASID support + * (max_pasids = 0). These devices cannot issue transactions + * with PASID, so they don't affect group's PASID usage. + */ + if ((device->dev->iommu->max_pasids > 0) && + (pasid >= device->dev->iommu->max_pasids)) { ret = -EINVAL; goto out_unlock; } diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index dc98c39d2b20..cc6a6c1585d2 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -252,7 +252,7 @@ static void __init gicv2m_teardown(void) static struct msi_parent_ops gicv2m_msi_parent_ops = { .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED, .required_flags = GICV2M_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "GICv2m-", diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c index bdb04c808148..c5a7eb1c0419 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c @@ -203,7 +203,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "ITS-", diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 34e9ca77a8c3..647b18e24e0c 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -197,7 +197,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = { .supported_flags = MBI_MSI_FLAGS_SUPPORTED, .required_flags = MBI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "MBI-", diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d67f93f6d750..60b976286636 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -161,7 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = { static const struct msi_parent_ops gicp_msi_parent_ops = { .supported_flags = GICP_MSI_FLAGS_SUPPORTED, .required_flags = GICP_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "GICP-", diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c index 28f7e81df94f..54f6f0811573 100644 --- a/drivers/irqchip/irq-mvebu-odmi.c +++ b/drivers/irqchip/irq-mvebu-odmi.c @@ -157,7 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = { static const struct msi_parent_ops odmi_msi_parent_ops = { .supported_flags = ODMI_MSI_FLAGS_SUPPORTED, .required_flags = ODMI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "ODMI-", diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index bdf5cd2037f2..62f76950a113 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -208,17 +208,17 @@ skip: } #ifdef CONFIG_SMP -static void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { lockdep_assert_held(&lpriv->lock); if (!timer_pending(&lpriv->timer)) { lpriv->timer.expires = jiffies + 1; - add_timer_on(&lpriv->timer, smp_processor_id()); + add_timer_on(&lpriv->timer, cpu); } } #else -static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { } #endif @@ -233,7 +233,7 @@ void imsic_local_sync_all(bool force_all) if (force_all) bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); if (!__imsic_local_sync(lpriv)) - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, smp_processor_id()); raw_spin_unlock_irqrestore(&lpriv->lock, flags); } @@ -278,7 +278,7 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu return; } - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, cpu); } } #else diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 813b38aec3e4..c40db9c161c1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -293,8 +293,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META; bio->bi_iter.bi_sector = SB_SECTOR; - __bio_add_page(bio, virt_to_page(out), SB_SIZE, - offset_in_page(out)); + bio_add_virt_nofail(bio, out, SB_SIZE); out->offset = cpu_to_le64(sb->offset); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index f0b5a6931161..d098e75e3461 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1364,7 +1364,7 @@ static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; - __bio_add_page(bio, virt_to_page(ptr), len, offset_in_page(ptr)); + bio_add_virt_nofail(bio, ptr, len); submit_bio(bio); } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index cc3d3897ef42..1f626066e8cc 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2557,14 +2557,8 @@ static void dm_integrity_inline_recheck(struct work_struct *w) char *mem; outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios); - - r = bio_add_page(outgoing_bio, virt_to_page(outgoing_data), ic->sectors_per_block << SECTOR_SHIFT, 0); - if (unlikely(r != (ic->sectors_per_block << SECTOR_SHIFT))) { - bio_put(outgoing_bio); - bio->bi_status = BLK_STS_RESOURCE; - bio_endio(bio); - return; - } + bio_add_virt_nofail(outgoing_bio, outgoing_data, + ic->sectors_per_block << SECTOR_SHIFT); bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1); if (IS_ERR(bip)) { @@ -3211,7 +3205,8 @@ next_chunk: bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios); bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; - __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + bio_add_virt_nofail(bio, recalc_buffer, + range.n_sectors << SECTOR_SHIFT); r = submit_bio_wait(bio); bio_put(bio); if (unlikely(r)) { @@ -3228,7 +3223,8 @@ next_chunk: bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios); bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; - __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + bio_add_virt_nofail(bio, recalc_buffer, + range.n_sectors << SECTOR_SHIFT); bip = bio_integrity_alloc(bio, GFP_NOIO, 1); if (unlikely(IS_ERR(bip))) { diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6adc55fd90d3..127138c61be5 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -14,6 +14,7 @@ #include "raid5.h" #include "raid10.h" #include "md-bitmap.h" +#include "dm-core.h" #include <linux/device-mapper.h> @@ -3308,6 +3309,7 @@ size_check: /* Disable/enable discard support on raid set. */ configure_discard_support(rs); + rs->md.dm_gendisk = ti->table->md->disk; mddev_unlock(&rs->md); return 0; @@ -3327,6 +3329,7 @@ static void raid_dtr(struct dm_target *ti) mddev_lock_nointr(&rs->md); md_stop(&rs->md); + rs->md.dm_gendisk = NULL; mddev_unlock(&rs->md); if (work_pending(&rs->md.event_work)) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9daa78c5fe33..0fde115e921f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -111,32 +111,48 @@ static void md_wakeup_thread_directly(struct md_thread __rcu *thread); /* Default safemode delay: 200 msec */ #define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1) /* - * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' - * is 1000 KB/sec, so the extra system load does not show up that much. - * Increase it if you want to have more _guaranteed_ speed. Note that - * the RAID driver will use the maximum available bandwidth if the IO - * subsystem is idle. There is also an 'absolute maximum' reconstruction - * speed limit - in case reconstruction slows down your system despite - * idle IO detection. + * Current RAID-1,4,5,6,10 parallel reconstruction 'guaranteed speed limit' + * is sysctl_speed_limit_min, 1000 KB/sec by default, so the extra system load + * does not show up that much. Increase it if you want to have more guaranteed + * speed. Note that the RAID driver will use the maximum bandwidth + * sysctl_speed_limit_max, 200 MB/sec by default, if the IO subsystem is idle. * - * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. - * or /sys/block/mdX/md/sync_speed_{min,max} + * Background sync IO speed control: + * + * - below speed min: + * no limit; + * - above speed min and below speed max: + * a) if mddev is idle, then no limit; + * b) if mddev is busy handling normal IO, then limit inflight sync IO + * to sync_io_depth; + * - above speed max: + * sync IO can't be issued; + * + * Following configurations can be changed via /proc/sys/dev/raid/ for system + * or /sys/block/mdX/md/ for one array. */ - static int sysctl_speed_limit_min = 1000; static int sysctl_speed_limit_max = 200000; -static inline int speed_min(struct mddev *mddev) +static int sysctl_sync_io_depth = 32; + +static int speed_min(struct mddev *mddev) { return mddev->sync_speed_min ? mddev->sync_speed_min : sysctl_speed_limit_min; } -static inline int speed_max(struct mddev *mddev) +static int speed_max(struct mddev *mddev) { return mddev->sync_speed_max ? mddev->sync_speed_max : sysctl_speed_limit_max; } +static int sync_io_depth(struct mddev *mddev) +{ + return mddev->sync_io_depth ? + mddev->sync_io_depth : sysctl_sync_io_depth; +} + static void rdev_uninit_serial(struct md_rdev *rdev) { if (!test_and_clear_bit(CollisionCheck, &rdev->flags)) @@ -293,14 +309,21 @@ static const struct ctl_table raid_table[] = { .procname = "speed_limit_min", .data = &sysctl_speed_limit_min, .maxlen = sizeof(int), - .mode = S_IRUGO|S_IWUSR, + .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "speed_limit_max", .data = &sysctl_speed_limit_max, .maxlen = sizeof(int), - .mode = S_IRUGO|S_IWUSR, + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_io_depth", + .data = &sysctl_sync_io_depth, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_dointvec, }, }; @@ -5091,7 +5114,7 @@ static ssize_t sync_min_show(struct mddev *mddev, char *page) { return sprintf(page, "%d (%s)\n", speed_min(mddev), - mddev->sync_speed_min ? "local": "system"); + mddev->sync_speed_min ? "local" : "system"); } static ssize_t @@ -5100,7 +5123,7 @@ sync_min_store(struct mddev *mddev, const char *buf, size_t len) unsigned int min; int rv; - if (strncmp(buf, "system", 6)==0) { + if (strncmp(buf, "system", 6) == 0) { min = 0; } else { rv = kstrtouint(buf, 10, &min); @@ -5120,7 +5143,7 @@ static ssize_t sync_max_show(struct mddev *mddev, char *page) { return sprintf(page, "%d (%s)\n", speed_max(mddev), - mddev->sync_speed_max ? "local": "system"); + mddev->sync_speed_max ? "local" : "system"); } static ssize_t @@ -5129,7 +5152,7 @@ sync_max_store(struct mddev *mddev, const char *buf, size_t len) unsigned int max; int rv; - if (strncmp(buf, "system", 6)==0) { + if (strncmp(buf, "system", 6) == 0) { max = 0; } else { rv = kstrtouint(buf, 10, &max); @@ -5146,6 +5169,35 @@ static struct md_sysfs_entry md_sync_max = __ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store); static ssize_t +sync_io_depth_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%d (%s)\n", sync_io_depth(mddev), + mddev->sync_io_depth ? "local" : "system"); +} + +static ssize_t +sync_io_depth_store(struct mddev *mddev, const char *buf, size_t len) +{ + unsigned int max; + int rv; + + if (strncmp(buf, "system", 6) == 0) { + max = 0; + } else { + rv = kstrtouint(buf, 10, &max); + if (rv < 0) + return rv; + if (max == 0) + return -EINVAL; + } + mddev->sync_io_depth = max; + return len; +} + +static struct md_sysfs_entry md_sync_io_depth = +__ATTR_RW(sync_io_depth); + +static ssize_t degraded_show(struct mddev *mddev, char *page) { return sprintf(page, "%d\n", mddev->degraded); @@ -5671,6 +5723,7 @@ static struct attribute *md_redundancy_attrs[] = { &md_mismatches.attr, &md_sync_min.attr, &md_sync_max.attr, + &md_sync_io_depth.attr, &md_sync_speed.attr, &md_sync_force_parallel.attr, &md_sync_completed.attr, @@ -8572,50 +8625,55 @@ void md_cluster_stop(struct mddev *mddev) put_cluster_ops(mddev); } -static int is_mddev_idle(struct mddev *mddev, int init) +static bool is_rdev_holder_idle(struct md_rdev *rdev, bool init) { + unsigned long last_events = rdev->last_events; + + if (!bdev_is_partition(rdev->bdev)) + return true; + + /* + * If rdev is partition, and user doesn't issue IO to the array, the + * array is still not idle if user issues IO to other partitions. + */ + rdev->last_events = part_stat_read_accum(rdev->bdev->bd_disk->part0, + sectors) - + part_stat_read_accum(rdev->bdev, sectors); + + return init || rdev->last_events <= last_events; +} + +/* + * mddev is idle if following conditions are matched since last check: + * 1) mddev doesn't have normal IO completed; + * 2) mddev doesn't have inflight normal IO; + * 3) if any member disk is partition, and other partitions don't have IO + * completed; + * + * Noted this checking rely on IO accounting is enabled. + */ +static bool is_mddev_idle(struct mddev *mddev, int init) +{ + unsigned long last_events = mddev->normal_io_events; + struct gendisk *disk; struct md_rdev *rdev; - int idle; - int curr_events; + bool idle = true; - idle = 1; - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) { - struct gendisk *disk = rdev->bdev->bd_disk; + disk = mddev_is_dm(mddev) ? mddev->dm_gendisk : mddev->gendisk; + if (!disk) + return true; - if (!init && !blk_queue_io_stat(disk->queue)) - continue; + mddev->normal_io_events = part_stat_read_accum(disk->part0, sectors); + if (!init && (mddev->normal_io_events > last_events || + bdev_count_inflight(disk->part0))) + idle = false; - curr_events = (int)part_stat_read_accum(disk->part0, sectors) - - atomic_read(&disk->sync_io); - /* sync IO will cause sync_io to increase before the disk_stats - * as sync_io is counted when a request starts, and - * disk_stats is counted when it completes. - * So resync activity will cause curr_events to be smaller than - * when there was no such activity. - * non-sync IO will cause disk_stat to increase without - * increasing sync_io so curr_events will (eventually) - * be larger than it was before. Once it becomes - * substantially larger, the test below will cause - * the array to appear non-idle, and resync will slow - * down. - * If there is a lot of outstanding resync activity when - * we set last_event to curr_events, then all that activity - * completing might cause the array to appear non-idle - * and resync will be slowed down even though there might - * not have been non-resync activity. This will only - * happen once though. 'last_events' will soon reflect - * the state where there is little or no outstanding - * resync requests, and further resync activity will - * always make curr_events less than last_events. - * - */ - if (init || curr_events - rdev->last_events > 64) { - rdev->last_events = curr_events; - idle = 0; - } - } + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (!is_rdev_holder_idle(rdev, init)) + idle = false; rcu_read_unlock(); + return idle; } @@ -8927,6 +8985,23 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) } } +static bool sync_io_within_limit(struct mddev *mddev) +{ + int io_sectors; + + /* + * For raid456, sync IO is stripe(4k) per IO, for other levels, it's + * RESYNC_PAGES(64k) per IO. + */ + if (mddev->level == 4 || mddev->level == 5 || mddev->level == 6) + io_sectors = 8; + else + io_sectors = 128; + + return atomic_read(&mddev->recovery_active) < + io_sectors * sync_io_depth(mddev); +} + #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) #define UPDATE_FREQUENCY (5*60*HZ) @@ -9195,7 +9270,8 @@ void md_do_sync(struct md_thread *thread) msleep(500); goto repeat; } - if (!is_mddev_idle(mddev, 0)) { + if (!sync_io_within_limit(mddev) && + !is_mddev_idle(mddev, 0)) { /* * Give other IO more of a chance. * The faster the devices, the less we wait. diff --git a/drivers/md/md.h b/drivers/md/md.h index 1cf00a04bcdd..d45a9e6ead80 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -132,7 +132,7 @@ struct md_rdev { sector_t sectors; /* Device size (in 512bytes sectors) */ struct mddev *mddev; /* RAID array if running */ - int last_events; /* IO event timestamp */ + unsigned long last_events; /* IO event timestamp */ /* * If meta_bdev is non-NULL, it means that a separate device is @@ -404,7 +404,8 @@ struct mddev { * are happening, so run/ * takeover/stop are not safe */ - struct gendisk *gendisk; + struct gendisk *gendisk; /* mdraid gendisk */ + struct gendisk *dm_gendisk; /* dm-raid gendisk */ struct kobject kobj; int hold_active; @@ -483,6 +484,7 @@ struct mddev { /* if zero, use the system-wide default */ int sync_speed_min; int sync_speed_max; + int sync_io_depth; /* resync even though the same disks are shared among md-devices */ int parallel_resync; @@ -518,6 +520,7 @@ struct mddev { * adding a spare */ + unsigned long normal_io_events; /* IO event timestamp */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; @@ -714,17 +717,6 @@ static inline int mddev_trylock(struct mddev *mddev) } extern void mddev_unlock(struct mddev *mddev); -static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) -{ - if (blk_queue_io_stat(bdev->bd_disk->queue)) - atomic_add(nr_sectors, &bdev->bd_disk->sync_io); -} - -static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) -{ - md_sync_acct(bio->bi_bdev, nr_sectors); -} - struct md_personality { struct md_submodule_head head; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index de9bccbe7337..657d481525be 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2382,7 +2382,6 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) wbio->bi_end_io = end_sync_write; atomic_inc(&r1_bio->remaining); - md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio)); submit_bio_noacct(wbio); } @@ -3055,7 +3054,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; if (bio->bi_end_io == end_sync_read) { read_targets--; - md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; submit_bio_noacct(bio); @@ -3064,7 +3062,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, } else { atomic_set(&r1_bio->remaining, 1); bio = r1_bio->bios[r1_bio->read_disk]; - md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; submit_bio_noacct(bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ba32bac975b8..dce06bf65016 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2426,7 +2426,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) tbio->bi_opf |= MD_FAILFAST; @@ -2448,8 +2447,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) bio_copy_data(tbio, fbio); d = r10_bio->devs[i].devnum; atomic_inc(&r10_bio->remaining); - md_sync_acct(conf->mirrors[d].replacement->bdev, - bio_sectors(tbio)); submit_bio_noacct(tbio); } @@ -2583,13 +2580,10 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[1].devnum; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); - md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); submit_bio_noacct(wbio); } if (wbio2) { atomic_inc(&conf->mirrors[d].replacement->nr_pending); - md_sync_acct(conf->mirrors[d].replacement->bdev, - bio_sectors(wbio2)); submit_bio_noacct(wbio2); } } @@ -3757,7 +3751,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->sectors = nr_sectors; if (bio->bi_end_io == end_sync_read) { - md_sync_acct_bio(bio, nr_sectors); bio->bi_status = 0; submit_bio_noacct(bio); } @@ -4880,7 +4873,6 @@ read_more: r10_bio->sectors = nr_sectors; /* Now submit the read */ - md_sync_acct_bio(read_bio, r10_bio->sectors); atomic_inc(&r10_bio->remaining); read_bio->bi_next = NULL; submit_bio_noacct(read_bio); @@ -4940,7 +4932,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) continue; atomic_inc(&rdev->nr_pending); - md_sync_acct_bio(b, r10_bio->sectors); atomic_inc(&r10_bio->remaining); b->bi_next = NULL; submit_bio_noacct(b); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6389383166c0..ca5b0e8ba707 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1240,10 +1240,6 @@ again: } if (rdev) { - if (s->syncing || s->expanding || s->expanded - || s->replacing) - md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); - set_bit(STRIPE_IO_STARTED, &sh->state); bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags); @@ -1300,10 +1296,6 @@ again: submit_bio_noacct(bi); } if (rrdev) { - if (s->syncing || s->expanding || s->expanded - || s->replacing) - md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); - set_bit(STRIPE_IO_STARTED, &sh->state); bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags); diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 09b9ab15e499..a20d03fdd6a9 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/reset.h> #include <linux/sizes.h> @@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv } } +static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) +{ + struct device *dev = mmc_dev(host->mmc); + int ret; + + /* + * This works around the design of the RK3576's power domains, which + * makes the PD_NVM power domain, which the sdhci controller on the + * RK3576 is in, never come back the same way once it's run-time + * suspended once. This can happen during early kernel boot if no driver + * is using either PD_NVM or its child power domain PD_SDGMAC for a + * short moment, leading to it being turned off to save power. By + * keeping it on, sdhci suspending won't lead to PD_NVM becoming a + * candidate for getting turned off. + */ + ret = dev_pm_genpd_rpm_always_on(dev, true); + if (ret && ret != -EOPNOTSUPP) + dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n", + ERR_PTR(ret)); + + dwcmshc_rk35xx_postinit(host, dwc_priv); +} + static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { .postinit = dwcmshc_rk35xx_postinit, }; +static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = { + .pdata = { + .ops = &sdhci_dwcmshc_rk35xx_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, + }, + .init = dwcmshc_rk35xx_init, + .postinit = dwcmshc_rk3576_postinit, +}; + static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = { .pdata = { .ops = &sdhci_dwcmshc_th1520_ops, @@ -1275,6 +1311,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = { .data = &sdhci_dwcmshc_rk35xx_pdata, }, { + .compatible = "rockchip,rk3576-dwcmshc", + .data = &sdhci_dwcmshc_rk3576_pdata, + }, + { .compatible = "rockchip,rk3568-dwcmshc", .data = &sdhci_dwcmshc_rk35xx_pdata, }, diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index f75c31815ab0..73385ff4c0f3 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -155,6 +155,7 @@ struct sdhci_am654_data { u32 tuning_loop; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) +#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) }; struct window { @@ -166,6 +167,7 @@ struct window { struct sdhci_am654_driver_data { const struct sdhci_pltfm_data *pdata; u32 flags; + u32 quirks; #define IOMUX_PRESENT (1 << 0) #define FREQSEL_2_BIT (1 << 1) #define STRBSEL_4_BIT (1 << 2) @@ -356,6 +358,29 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, sdhci_set_clock(host, clock); } +static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + int ret; + + if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) && + ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = mmc_regulator_set_vqmmc(mmc, ios); + if (ret < 0) { + pr_err("%s: Switching to 1.8V signalling voltage failed,\n", + mmc_hostname(mmc)); + return -EIO; + } + } + return 0; + } + + return sdhci_start_signal_voltage_switch(mmc, ios); +} + static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg) { writeb(val, host->ioaddr + reg); @@ -650,6 +675,12 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; +static const struct sdhci_am654_driver_data sdhci_am62_4bit_drvdata = { + .pdata = &sdhci_j721e_4bit_pdata, + .flags = IOMUX_PRESENT, + .quirks = SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA, +}; + static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", @@ -872,7 +903,7 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am62-sdhci", - .data = &sdhci_j721e_4bit_drvdata, + .data = &sdhci_am62_4bit_drvdata, }, { /* sentinel */ } }; @@ -906,6 +937,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); sdhci_am654 = sdhci_pltfm_priv(pltfm_host); sdhci_am654->flags = drvdata->flags; + sdhci_am654->quirks = drvdata->quirks; clk_xin = devm_clk_get(dev, "clk_xin"); if (IS_ERR(clk_xin)) { @@ -940,6 +972,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) goto err_pltfm_free; } + host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; pm_runtime_get_noresume(dev); diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index cf0d51805272..f6921368cd14 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -16,6 +16,7 @@ #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/timer.h> +#include <net/netdev_queues.h> MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Kvaser AB <support@kvaser.com>"); @@ -410,10 +411,13 @@ struct kvaser_pciefd_can { void __iomem *reg_base; struct can_berr_counter bec; u8 cmd_seq; + u8 tx_max_count; + u8 tx_idx; + u8 ack_idx; int err_rep_cnt; - int echo_idx; + unsigned int completed_tx_pkts; + unsigned int completed_tx_bytes; spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */ - spinlock_t echo_lock; /* Locks the message echo buffer */ struct timer_list bec_poll_timer; struct completion start_comp, flush_comp; }; @@ -714,6 +718,9 @@ static int kvaser_pciefd_open(struct net_device *netdev) int ret; struct kvaser_pciefd_can *can = netdev_priv(netdev); + can->tx_idx = 0; + can->ack_idx = 0; + ret = open_candev(netdev); if (ret) return ret; @@ -745,21 +752,26 @@ static int kvaser_pciefd_stop(struct net_device *netdev) timer_delete(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; + netdev_reset_queue(netdev); close_candev(netdev); return ret; } +static unsigned int kvaser_pciefd_tx_avail(const struct kvaser_pciefd_can *can) +{ + return can->tx_max_count - (READ_ONCE(can->tx_idx) - READ_ONCE(can->ack_idx)); +} + static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, - struct kvaser_pciefd_can *can, + struct can_priv *can, u8 seq, struct sk_buff *skb) { struct canfd_frame *cf = (struct canfd_frame *)skb->data; int packet_size; - int seq = can->echo_idx; memset(p, 0, sizeof(*p)); - if (can->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + if (can->ctrlmode & CAN_CTRLMODE_ONE_SHOT) p->header[1] |= KVASER_PCIEFD_TPACKET_SMS; if (cf->can_id & CAN_RTR_FLAG) @@ -782,7 +794,7 @@ static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, } else { p->header[1] |= FIELD_PREP(KVASER_PCIEFD_RPACKET_DLC_MASK, - can_get_cc_dlc((struct can_frame *)cf, can->can.ctrlmode)); + can_get_cc_dlc((struct can_frame *)cf, can->ctrlmode)); } p->header[1] |= FIELD_PREP(KVASER_PCIEFD_PACKET_SEQ_MASK, seq); @@ -797,22 +809,24 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_pciefd_can *can = netdev_priv(netdev); - unsigned long irq_flags; struct kvaser_pciefd_tx_packet packet; + unsigned int seq = can->tx_idx & (can->can.echo_skb_max - 1); + unsigned int frame_len; int nr_words; - u8 count; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; + if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1)) + return NETDEV_TX_BUSY; - nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); + nr_words = kvaser_pciefd_prepare_tx_packet(&packet, &can->can, seq, skb); - spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx, 0); - - /* Move echo index to the next slot */ - can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; + WRITE_ONCE(can->can.echo_skb[seq], NULL); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, netdev, seq, frame_len); + netdev_sent_queue(netdev, frame_len); + WRITE_ONCE(can->tx_idx, can->tx_idx + 1); /* Write header to fifo */ iowrite32(packet.header[0], @@ -836,14 +850,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, KVASER_PCIEFD_KCAN_FIFO_LAST_REG); } - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); - /* No room for a new message, stop the queue until at least one - * successful transmit - */ - if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx]) - netif_stop_queue(netdev); - spin_unlock_irqrestore(&can->echo_lock, irq_flags); + netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1); return NETDEV_TX_OK; } @@ -970,6 +977,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->kv_pcie = pcie; can->cmd_seq = 0; can->err_rep_cnt = 0; + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; can->bec.txerr = 0; can->bec.rxerr = 0; @@ -983,11 +992,10 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) tx_nr_packets_max = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_MAX_MASK, ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); can->can.clock.freq = pcie->freq; - can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); - can->echo_idx = 0; - spin_lock_init(&can->echo_lock); + can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count); spin_lock_init(&can->lock); can->can.bittiming_const = &kvaser_pciefd_bittiming_const; @@ -1201,7 +1209,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_canfd_skb(priv->dev, &cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } cf->len = can_fd_dlc2len(dlc); @@ -1213,7 +1221,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->ctrlmode); } @@ -1231,7 +1239,9 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, priv->dev->stats.rx_packets++; kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - return netif_rx(skb); + netif_rx(skb); + + return 0; } static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can, @@ -1510,19 +1520,21 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]); - int len; - u8 count; + unsigned int len, frame_len = 0; struct sk_buff *skb; + if (echo_idx != (can->ack_idx & (can->can.echo_skb_max - 1))) + return 0; skb = can->can.echo_skb[echo_idx]; - if (skb) - kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - len = can_get_echo_skb(can->can.dev, echo_idx, NULL); - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + if (!skb) + return 0; + kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); + len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len); - if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev)) - netif_wake_queue(can->can.dev); + /* Pairs with barrier in kvaser_pciefd_start_xmit() */ + smp_store_release(&can->ack_idx, can->ack_idx + 1); + can->completed_tx_pkts++; + can->completed_tx_bytes += frame_len; if (!one_shot_fail) { can->can.dev->stats.tx_bytes += len; @@ -1638,32 +1650,51 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) { int pos = 0; int res = 0; + unsigned int i; do { res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf); } while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE); + /* Report ACKs in this buffer to BQL en masse for correct periods */ + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; + + if (!can->completed_tx_pkts) + continue; + netif_subqueue_completed_wake(can->can.dev, 0, + can->completed_tx_pkts, + can->completed_tx_bytes, + kvaser_pciefd_tx_avail(can), 1); + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; + } + return res; } -static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { + void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG; u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); + + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */ + } - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { kvaser_pciefd_read_buffer(pcie, 1); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */ + } if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || irq & KVASER_PCIEFD_SRB_IRQ_DUF0 || irq & KVASER_PCIEFD_SRB_IRQ_DUF1)) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); - - iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1691,29 +1722,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); - u32 srb_irq = 0; - u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; + iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + if (pci_irq & irq_mask->kcan_rx0) - srb_irq = kvaser_pciefd_receive_irq(pcie); + kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; - - if (srb_release) - iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); return IRQ_HANDLED; } @@ -1733,13 +1757,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie) } } +static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie) +{ + unsigned int i; + + /* Masking PCI_IRQ is insufficient as running ISR will unmask it */ + iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); + for (i = 0; i < pcie->nr_channels; ++i) + iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); +} + static int kvaser_pciefd_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int ret; struct kvaser_pciefd *pcie; const struct kvaser_pciefd_irq_mask *irq_mask; - void __iomem *irq_en_base; pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -1805,8 +1838,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); /* Enable PCI interrupts */ - irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie); - iowrite32(irq_mask->all, irq_en_base); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); /* Ready the DMA buffers */ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); @@ -1820,8 +1852,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return 0; err_free_irq: - /* Disable PCI interrupts */ - iowrite32(0, irq_en_base); + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); err_pci_free_irq_vectors: @@ -1844,35 +1875,26 @@ err_disable_pci: return ret; } -static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) -{ - int i; - - for (i = 0; i < pcie->nr_channels; i++) { - struct kvaser_pciefd_can *can = pcie->can[i]; - - if (can) { - iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - unregister_candev(can->can.dev); - timer_delete(&can->bec_poll_timer); - kvaser_pciefd_pwm_stop(can); - free_candev(can->can.dev); - } - } -} - static void kvaser_pciefd_remove(struct pci_dev *pdev) { struct kvaser_pciefd *pcie = pci_get_drvdata(pdev); + unsigned int i; - kvaser_pciefd_remove_all_ctrls(pcie); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; - /* Disable interrupts */ - iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); - iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + unregister_candev(can->can.dev); + timer_delete(&can->bec_poll_timer); + kvaser_pciefd_pwm_stop(can); + } + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); pci_free_irq_vectors(pcie->pci); + + for (i = 0; i < pcie->nr_channels; ++i) + free_candev(pcie->can[i]->can.dev); + pci_iounmap(pdev, pcie->reg_base); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 24c6622d36bd..58ff2ec1d975 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -71,12 +71,21 @@ MODULE_AUTHOR("Dario Binacchi <dario.binacchi@amarulasolutions.com>"); #define SLCAN_CMD_LEN 1 #define SLCAN_SFF_ID_LEN 3 #define SLCAN_EFF_ID_LEN 8 +#define SLCAN_DATA_LENGTH_LEN 1 +#define SLCAN_ERROR_LEN 1 #define SLCAN_STATE_LEN 1 #define SLCAN_STATE_BE_RXCNT_LEN 3 #define SLCAN_STATE_BE_TXCNT_LEN 3 -#define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \ - SLCAN_STATE_BE_RXCNT_LEN + \ - SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_STATE_MSG_LEN (SLCAN_CMD_LEN + \ + SLCAN_STATE_LEN + \ + SLCAN_STATE_BE_RXCNT_LEN + \ + SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_ERROR_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_ERROR_LEN + \ + SLCAN_DATA_LENGTH_LEN) +#define SLCAN_FRAME_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_SFF_ID_LEN + \ + SLCAN_DATA_LENGTH_LEN) struct slcan { struct can_priv can; @@ -176,6 +185,9 @@ static void slcan_bump_frame(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; + if (sl->rcount < SLCAN_FRAME_MSG_LEN_MIN) + return; + skb = alloc_can_skb(sl->dev, &cf); if (unlikely(!skb)) { sl->dev->stats.rx_dropped++; @@ -281,7 +293,7 @@ static void slcan_bump_state(struct slcan *sl) return; } - if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN) + if (state == sl->can.state || sl->rcount != SLCAN_STATE_MSG_LEN) return; cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1; @@ -328,6 +340,9 @@ static void slcan_bump_err(struct slcan *sl) bool rx_errors = false, tx_errors = false, rx_over_errors = false; int i, len; + if (sl->rcount < SLCAN_ERROR_MSG_LEN_MIN) + return; + /* get len from sanitized ASCII value */ len = cmd[1]; if (len >= '0' && len < '9') @@ -456,8 +471,7 @@ static void slcan_bump(struct slcan *sl) static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount > 4) + if (!test_and_clear_bit(SLF_ERROR, &sl->flags)) slcan_bump(sl); sl->rcount = 0; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9eb39cfa5fb2..7216eb8f9493 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,26 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, } } +static void b53_set_eap_mode(struct b53_device *dev, int port, int mode) +{ + u64 eap_conf; + + if (is5325(dev) || is5365(dev) || dev->chip_id == BCM5389_DEVICE_ID) + return; + + b53_read64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), &eap_conf); + + if (is63xx(dev)) { + eap_conf &= ~EAP_MODE_MASK_63XX; + eap_conf |= (u64)mode << EAP_MODE_SHIFT_63XX; + } else { + eap_conf &= ~EAP_MODE_MASK; + eap_conf |= (u64)mode << EAP_MODE_SHIFT; + } + + b53_write64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), eap_conf); +} + static void b53_set_forwarding(struct b53_device *dev, int enable) { u8 mgmt; @@ -586,6 +606,13 @@ int b53_setup_port(struct dsa_switch *ds, int port) b53_port_set_mcast_flood(dev, port, true); b53_port_set_learning(dev, port, false); + /* Force all traffic to go to the CPU port to prevent the ASIC from + * trying to forward to bridged ports on matching FDB entries, then + * dropping frames because it isn't allowed to forward there. + */ + if (dsa_is_user_port(ds, port)) + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + return 0; } EXPORT_SYMBOL(b53_setup_port); @@ -2042,6 +2069,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvlan |= BIT(i); } + /* Disable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_BASIC); + /* Configure the local port VLAN control membership to include * remote ports and update the local port bitmask */ @@ -2077,6 +2107,9 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvlan &= ~BIT(i); } + /* Enable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index bfbcb66bef66..5f7a0e5c5709 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -50,6 +50,9 @@ /* Jumbo Frame Registers */ #define B53_JUMBO_PAGE 0x40 +/* EAP Registers */ +#define B53_EAP_PAGE 0x42 + /* EEE Control Registers Page */ #define B53_EEE_PAGE 0x92 @@ -481,6 +484,17 @@ #define JMS_MAX_SIZE 9724 /************************************************************************* + * EAP Page Registers + *************************************************************************/ +#define B53_PORT_EAP_CONF(i) (0x20 + 8 * (i)) +#define EAP_MODE_SHIFT 51 +#define EAP_MODE_SHIFT_63XX 50 +#define EAP_MODE_MASK (0x3ull << EAP_MODE_SHIFT) +#define EAP_MODE_MASK_63XX (0x3ull << EAP_MODE_SHIFT_63XX) +#define EAP_MODE_BASIC 0 +#define EAP_MODE_SIMPLIFIED 3 + +/************************************************************************* * EEE Configuration Page Registers *************************************************************************/ diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 89f0796894af..f95a9aac56ee 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -265,16 +265,70 @@ static void ksz_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); +/** + * ksz_phylink_mac_disable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * + * This function is a dummy handler. See ksz_phylink_mac_enable_tx_lpi() for + * a detailed explanation of EEE/LPI handling in KSZ switches. + */ +static void ksz_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ +} + +/** + * ksz_phylink_mac_enable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * @timer: timer value before entering LPI (unused) + * @tx_clock_stop: whether to stop the TX clock in LPI mode (unused) + * + * This function signals to phylink that the driver architecture supports + * LPI management, enabling phylink to control EEE advertisement during + * negotiation according to IEEE Std 802.3 (Clause 78). + * + * Hardware Management of EEE/LPI State: + * For KSZ switch ports with integrated PHYs (e.g., KSZ9893R ports 1-2), + * observation and testing suggest that the actual EEE / Low Power Idle (LPI) + * state transitions are managed autonomously by the hardware based on + * the auto-negotiation results. (Note: While the datasheet describes EEE + * operation based on negotiation, it doesn't explicitly detail the internal + * MAC/PHY interaction, so autonomous hardware management of the MAC state + * for LPI is inferred from observed behavior). + * This hardware control, consistent with the switch's ability to operate + * autonomously via strapping, means MAC-level software intervention is not + * required or exposed for managing the LPI state once EEE is negotiated. + * (Ref: KSZ9893R Data Sheet DS00002420D, primarily Section 4.7.5 explaining + * EEE, also Sections 4.1.7 on Auto-Negotiation and 3.2.1 on Configuration + * Straps). + * + * Additionally, ports configured as MAC interfaces (e.g., KSZ9893R port 3) + * lack documented MAC-level LPI control. + * + * Therefore, this callback performs no action and serves primarily to inform + * phylink of LPI awareness and to document the inferred hardware behavior. + * + * Returns: 0 (Always success) + */ +static int ksz_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + return 0; +} + static const struct phylink_mac_ops ksz88x3_phylink_mac_ops = { .mac_config = ksz88x3_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct phylink_mac_ops ksz8_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz88xx_dev_ops = { @@ -358,6 +412,8 @@ static const struct phylink_mac_ops ksz9477_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz9477_dev_ops = { @@ -401,6 +457,8 @@ static const struct phylink_mac_ops lan937x_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops lan937x_dev_ops = { @@ -2016,6 +2074,18 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, if (dev->dev_ops->get_caps) dev->dev_ops->get_caps(dev, port, config); + + if (ds->ops->support_eee && ds->ops->support_eee(ds, port)) { + memcpy(config->lpi_interfaces, config->supported_interfaces, + sizeof(config->lpi_interfaces)); + + config->lpi_capabilities = MAC_100FD; + if (dev->info->gbit_capable[port]) + config->lpi_capabilities |= MAC_1000FD; + + /* EEE is fully operational */ + config->eee_enabled_default = true; + } } void ksz_r_mib_stats64(struct ksz_device *dev, int port) @@ -3008,31 +3078,6 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; - case KSZ8567_CHIP_ID: - /* KSZ8567R Errata DS80000752C Module 4 */ - case KSZ8765_CHIP_ID: - case KSZ8794_CHIP_ID: - case KSZ8795_CHIP_ID: - /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ - case KSZ9477_CHIP_ID: - /* KSZ9477S Errata DS80000754A Module 4 */ - case KSZ9567_CHIP_ID: - /* KSZ9567S Errata DS80000756A Module 4 */ - case KSZ9896_CHIP_ID: - /* KSZ9896C Errata DS80000757A Module 3 */ - case KSZ9897_CHIP_ID: - case LAN9646_CHIP_ID: - /* KSZ9897R Errata DS80000758C Module 4 */ - /* Energy Efficient Ethernet (EEE) feature select must be manually disabled - * The EEE feature is enabled by default, but it is not fully - * operational. It must be manually disabled through register - * controls. If not disabled, the PHY ports can auto-negotiate - * to enable EEE, and this feature can cause link drops when - * linked to another device supporting EEE. - * - * The same item appears in the errata for all switches above. - */ - return MICREL_NO_EEE; } return 0; @@ -3466,6 +3511,20 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) return -EOPNOTSUPP; } +/** + * ksz_support_eee - Determine Energy Efficient Ethernet (EEE) support for a + * port + * @ds: Pointer to the DSA switch structure + * @port: Port number to check + * + * This function also documents devices where EEE was initially advertised but + * later withdrawn due to reliability issues, as described in official errata + * documents. These devices are explicitly listed to record known limitations, + * even if there is no technical necessity for runtime checks. + * + * Returns: true if the internal PHY on the given port supports fully + * operational EEE, false otherwise. + */ static bool ksz_support_eee(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; @@ -3475,15 +3534,35 @@ static bool ksz_support_eee(struct dsa_switch *ds, int port) switch (dev->chip_id) { case KSZ8563_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9893_CHIP_ID: + return true; case KSZ8567_CHIP_ID: + /* KSZ8567R Errata DS80000752C Module 4 */ + case KSZ8765_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8795_CHIP_ID: + /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ case KSZ9477_CHIP_ID: - case KSZ9563_CHIP_ID: + /* KSZ9477S Errata DS80000754A Module 4 */ case KSZ9567_CHIP_ID: - case KSZ9893_CHIP_ID: + /* KSZ9567S Errata DS80000756A Module 4 */ case KSZ9896_CHIP_ID: + /* KSZ9896C Errata DS80000757A Module 3 */ case KSZ9897_CHIP_ID: case LAN9646_CHIP_ID: - return true; + /* KSZ9897R Errata DS80000758C Module 4 */ + /* Energy Efficient Ethernet (EEE) feature select must be + * manually disabled + * The EEE feature is enabled by default, but it is not fully + * operational. It must be manually disabled through register + * controls. If not disabled, the PHY ports can auto-negotiate + * to enable EEE, and this feature can cause link drops when + * linked to another device supporting EEE. + * + * The same item appears in the errata for all switches above. + */ + break; } return false; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index f8454f3b6f9c..f674c400f05b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2081,6 +2081,7 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, switch (state) { case BR_STATE_DISABLED: case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: /* From UM10944 description of DRPDTAG (why put this there?): * "Management traffic flows to the port regardless of the state * of the INGRESS flag". So BPDUs are still be allowed to pass. @@ -2090,11 +2091,6 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, mac[port].egress = false; mac[port].dyn_learn = false; break; - case BR_STATE_LISTENING: - mac[port].ingress = true; - mac[port].egress = false; - mac[port].dyn_learn = false; - break; case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d748dc6de923..1e9ab65218ff 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -614,7 +614,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) struct airoha_queue_entry *e = &q->entry[q->tail]; struct airoha_qdma_desc *desc = &q->desc[q->tail]; u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); - dma_addr_t dma_addr = le32_to_cpu(desc->addr); struct page *page = virt_to_head_page(e->buf); u32 desc_ctrl = le32_to_cpu(desc->ctrl); struct airoha_gdm_port *port; @@ -623,22 +622,16 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; - if (!dma_addr) - break; - - len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); - if (!len) - break; - q->tail = (q->tail + 1) % q->ndesc; q->queued--; - dma_sync_single_for_cpu(eth->dev, dma_addr, + dma_sync_single_for_cpu(eth->dev, e->dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); data_len = q->skb ? q->buf_size : SKB_WITH_OVERHEAD(q->buf_size); - if (data_len < len) + if (!len || data_len < len) goto free_frag; p = airoha_qdma_get_gdm_port(eth, desc); @@ -701,9 +694,12 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) q->skb = NULL; continue; free_frag: - page_pool_put_full_page(q->page_pool, page, true); - dev_kfree_skb(q->skb); - q->skb = NULL; + if (q->skb) { + dev_kfree_skb(q->skb); + q->skb = NULL; + } else { + page_pool_put_full_page(q->page_pool, page, true); + } } airoha_qdma_fill_rx_queue(q); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86a5de44b6f3..6afc2ab6fad2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp) netdev_unlock(bp->dev); } +/* Same as bnxt_lock_sp() with additional rtnl_lock */ +static void bnxt_rtnl_lock_sp(struct bnxt *bp) +{ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); + netdev_lock(bp->dev); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + netdev_unlock(bp->dev); + rtnl_unlock(); +} + /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!netdev_trylock(bp->dev)) { + while (!rtnl_trylock()) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } + netdev_lock(bp->dev); rc = bnxt_open(bp->dev); if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); netdev_unlock(bp->dev); + rtnl_unlock(); goto ulp_start; } @@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, 0); bnxt_reenable_sriov(bp); netdev_lock(bp->dev); @@ -15936,7 +15954,7 @@ err_reset: rc); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - bnxt_reset_task(bp, true); + netif_close(dev); return rc; } @@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; + rtnl_lock(); netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { @@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device) resume_exit: netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); @@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); + rtnl_lock(); netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); @@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); netdev_unlock(netdev); + rtnl_unlock(); bnxt_ulp_start(bp, err); if (!err) bnxt_reenable_sriov(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..7564705d6478 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -20,6 +20,7 @@ #include <asm/byteorder.h> #include <linux/bitmap.h> #include <linux/auxiliary_bus.h> +#include <net/netdev_lock.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); - netdev_unlock(bp->dev); } } @@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_restart) return; @@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); - netdev_unlock(bp->dev); kfree(ent); } } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1fe8ec37491b..e1e8bd2ec155 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -997,22 +997,15 @@ static void macb_update_stats(struct macb *bp) static int macb_halt_tx(struct macb *bp) { - unsigned long halt_time, timeout; - u32 status; + u32 status; macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT)); - timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT); - do { - halt_time = jiffies; - status = macb_readl(bp, TSR); - if (!(status & MACB_BIT(TGO))) - return 0; - - udelay(250); - } while (time_before(halt_time, timeout)); - - return -ETIMEDOUT; + /* Poll TSR until TGO is cleared or timeout. */ + return read_poll_timeout_atomic(macb_readl, status, + !(status & MACB_BIT(TGO)), + 250, MACB_HALT_TIMEOUT, false, + bp, TSR); } static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 625245b0845c..eba73246f986 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -67,6 +67,8 @@ #define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE) #define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) #define TSNEP_TX_TYPE_XSK BIT(12) +#define TSNEP_TX_TYPE_TSTAMP BIT(13) +#define TSNEP_TX_TYPE_SKB_TSTAMP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_TSTAMP) #define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_REDIRECT BIT(1) @@ -386,8 +388,7 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, if (entry->skb) { entry->properties = length & TSNEP_DESC_LENGTH_MASK; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)) + if ((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG; /* toggle user flag to prevent false acknowledge @@ -479,7 +480,8 @@ static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry, return mapped; } -static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) +static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count, + bool do_tstamp) { struct device *dmadev = tx->adapter->dmadev; struct tsnep_tx_entry *entry; @@ -505,6 +507,9 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) entry->type = TSNEP_TX_TYPE_SKB_INLINE; mapped = 0; } + + if (do_tstamp) + entry->type |= TSNEP_TX_TYPE_TSTAMP; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -558,11 +563,12 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, struct tsnep_tx *tx) { - int count = 1; struct tsnep_tx_entry *entry; + bool do_tstamp = false; + int count = 1; int length; - int i; int retval; + int i; if (skb_shinfo(skb)->nr_frags > 0) count += skb_shinfo(skb)->nr_frags; @@ -579,7 +585,13 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, entry = &tx->entry[tx->write]; entry->skb = skb; - retval = tsnep_tx_map(skb, tx, count); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + tx->adapter->hwtstamp_config.tx_type == HWTSTAMP_TX_ON) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + do_tstamp = true; + } + + retval = tsnep_tx_map(skb, tx, count, do_tstamp); if (retval < 0) { tsnep_tx_unmap(tx, tx->write, count); dev_kfree_skb_any(entry->skb); @@ -591,9 +603,6 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, } length = retval; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - for (i = 0; i < count; i++) tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length, i == count - 1); @@ -844,8 +853,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) length = tsnep_tx_unmap(tx, tx->read, count); - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) && + if (((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) && (__le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) { struct skb_shared_hwtstamps hwtstamps; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index a0bcfb5a713d..ff3295b60a69 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) return -EBUSY; } + netif_device_detach(priv->netdev); + priv->reset_type = type; set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); @@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) return ret; } + netif_device_attach(priv->netdev); + dev_info(&priv->pdev->dev, "reset done\n"); return ret; } @@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv) if (running) dev_close(priv->netdev); - hbg_reset(priv); - - /* in hbg_pci_err_detected(), we will detach first, - * so we need to attach before open - */ - if (!netif_device_present(priv->netdev)) - netif_device_attach(priv->netdev); + if (hbg_reset(priv)) + goto err_unlock; if (running) dev_open(priv->netdev, NULL); + +err_unlock: rtnl_unlock(); } @@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); hbg_err_reset(priv); - netif_device_attach(netdev); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 8f1107b85fbb..55520053270a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv, const struct hbg_ethtool_stats *stats; u32 i; + if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return; + for (i = 0; i < info_len; i++) { stats = &info[i]; if (!stats->reg) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 22371011c249..2410aee59fb2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd..6446d0fcc052 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index aef0e9775a33..70dbf80f3bb7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -143,6 +143,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -153,6 +154,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 82f09b4030bc..3a033ce19cda 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -723,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -740,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -2203,8 +2205,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2227,7 +2229,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2240,7 +2242,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index bdf52cef3891..2d5f5c9f91ce 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - else - return work_done; + return work_done; } /** diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 0b27a695008b..971993586fb4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -717,6 +717,11 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + + /* pass lmac as 0 for CGX_CMR_RX_STAT9-12 */ + if (idx >= CGX_RX_STAT_GLOBAL_INDEX) + lmac_id = 0; + *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7fa98aeb3663..4a3370a40dd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -13,19 +13,26 @@ /* RVU LMTST */ #define LMT_TBL_OP_READ 0 #define LMT_TBL_OP_WRITE 1 -#define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 +#define LMT_MAX_VFS 256 + +#define LMT_MAP_ENTRY_ENA BIT_ULL(20) +#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) /* Function to perform operations (read/write) on lmtst map table */ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, int lmt_tbl_op) { void __iomem *lmt_map_base; - u64 tbl_base; + u64 tbl_base, cfg; + int pfs, vfs; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + vfs = 1 << (cfg & 0xF); + pfs = 1 << ((cfg >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); return -ENOMEM; @@ -35,6 +42,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, *val = readq(lmt_map_base + index); } else { writeq((*val), (lmt_map_base + index)); + + cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); + /* 2048 LMTLINES */ + cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); + + writeq(cfg, (lmt_map_base + (index + 8))); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S * changes effective. Write 1 for flush and read is being used as a * barrier and sets up a data dependency. Write to 0 after a write @@ -52,7 +66,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, #define LMT_MAP_TBL_W1_OFF 8 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) { - return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } @@ -69,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); - pf = rvu_get_pf(pcifunc) & 0x1F; + pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1f9ec03c2ce..c827da626471 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; + int apr_pfs, apr_vfs; int buf_size = 10240; size_t off = 0; int index = 0; @@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, return -ENOMEM; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + apr_vfs = 1 << (val & 0xF); + apr_pfs = 1 << ((val >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); + lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * + LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); kfree(buf); @@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", pf); - index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; + index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", (tbl_base + index)); lmt_addr = readq(lmt_map_base + index); @@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, /* Reading num of VFs per PF */ rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); for (vf = 0; vf < num_vfs; vf++) { - index = (pf * rvu->hw->total_vfs * 16) + + index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d:VF%d \t\t", pf, vf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index f3b9daffaec3..4c7e0f345cb5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -531,7 +531,8 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, if (sw_tx_sc->encrypt) sectag_tci |= (MCS_TCI_E | MCS_TCI_C); - policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, secy->netdev->mtu); + policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, + pfvf->netdev->mtu + OTX2_ETH_HLEN); /* Write SecTag excluding AN bits(1..0) */ policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_TCI, sectag_tci >> 2); policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 1e88422825be..d6b4b74e4002 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -356,6 +356,7 @@ struct otx2_flow_config { struct list_head flow_list_tc; u8 ucast_flt_cnt; bool ntuple; + u16 ntuple_cnt; }; struct dev_hw_ops { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 33ec9a7f7c03..e13ae5484c19 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -41,6 +41,7 @@ static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id, if (!pfvf->flow_cfg) return 0; + pfvf->flow_cfg->ntuple_cnt = ctx->val.vu16; otx2_alloc_mcam_entries(pfvf, ctx->val.vu16); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 010385b29988..45b8c9230184 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -315,7 +315,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_pause_frm_cfg *req, *rsp; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return; mutex_lock(&pfvf->mbox.lock); @@ -347,7 +347,7 @@ static int otx2_set_pauseparam(struct net_device *netdev, if (pause->autoneg) return -EOPNOTSUPP; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return -EOPNOTSUPP; if (pause->rx_pause) @@ -941,8 +941,8 @@ static u32 otx2_get_link(struct net_device *netdev) { struct otx2_nic *pfvf = netdev_priv(netdev); - /* LBK link is internal and always UP */ - if (is_otx2_lbkvf(pfvf->pdev)) + /* LBK and SDP links are internal and always UP */ + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return 1; return pfvf->linfo.link_up; } @@ -1413,7 +1413,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, { struct otx2_nic *pfvf = netdev_priv(netdev); - if (is_otx2_lbkvf(pfvf->pdev)) { + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) { cmd->base.duplex = DUPLEX_FULL; cmd->base.speed = SPEED_100000; } else { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 47bfd1fb37d4..64c6d9162ef6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -247,7 +247,7 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf) mutex_unlock(&pfvf->mbox.lock); /* Allocate entries for Ntuple filters */ - count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT); + count = otx2_alloc_mcam_entries(pfvf, flow_cfg->ntuple_cnt); if (count <= 0) { otx2_clear_ntuple_flow_info(pfvf, flow_cfg); return 0; @@ -307,6 +307,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) INIT_LIST_HEAD(&pf->flow_cfg->flow_list_tc); pf->flow_cfg->ucast_flt_cnt = OTX2_DEFAULT_UNICAST_FLOWS; + pf->flow_cfg->ntuple_cnt = OTX2_DEFAULT_FLOWCOUNT; /* Allocate bare minimum number of MCAM entries needed for * unicast and ntuple filters. diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d49..9b28be4c4a5d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } #ifdef CONFIG_DCB - err = otx2_dcbnl_set_ops(netdev); - if (err) - goto err_free_zc_bmap; + /* Priority flow control is not supported for LBK and SDP vf(s) */ + if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) { + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_free_zc_bmap; + } #endif otx2_qos_init(vf, qos_txqs); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 22a532695fb0..6c92072b4c28 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4748,7 +4748,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) } if (mtk_is_netsys_v3_or_greater(mac->hw) && - MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW_BIT) && + MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW) && id == MTK_GMAC1_ID) { mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3506024c2453..9bd166f489e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4349,6 +4349,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_MACSEC; + if (netdev->features & NETIF_F_HW_MACSEC) + netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); + return features; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 464821dd492d..a2033837182e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3014,6 +3014,9 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, .rif = rif, }; + if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif))) + return 0; + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); if (rms.err) goto err_arp; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2d6bfb5d693..a70b88037a20 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3495,6 +3495,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { struct lan743x_tx *tx; + u32 sgmii_ctl; int index; int ret; @@ -3507,6 +3508,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); pci11x1x_set_rfe_rd_fifo_threshold(adapter); + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + if (adapter->is_sgmii_en) { + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + } else { + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + } + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3558,7 +3568,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { - u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -3570,10 +3579,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) adapter->mdiobus->priv = (void *)adapter; if (adapter->is_pci11x1x) { if (adapter->is_sgmii_en) { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); adapter->mdiobus->read = lan743x_mdiobus_read_c22; @@ -3584,10 +3589,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "RGMII operation\n"); // Only C22 support when RGMII I/F diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 99df00c30b8c..b5d744d2586f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -203,7 +203,7 @@ static struct pci_driver qede_pci_driver = { }; static struct qed_eth_cb_ops qede_ll_ops = { - { + .common = { #ifdef CONFIG_RFS_ACCEL .arfs_filter_op = qede_arfs_filter_op, #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 28d24d59efb8..d57b976b9040 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -1484,8 +1484,11 @@ static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_o } cmd_op = (cmd.rsp.arg[0] & 0xff); - if (cmd.rsp.arg[0] >> 25 == 2) - return 2; + if (cmd.rsp.arg[0] >> 25 == 2) { + ret = 2; + goto out; + } + if (cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) set_bit(QLC_BC_VF_STATE, &vf->state); else diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793a..6c7e8655a7eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY * address. No need to mask it again. */ - reg |= 1 << H3_EPHY_ADDR_SHIFT; + reg |= ret << H3_EPHY_ADDR_SHIFT; } else { /* For SoCs without internal PHY the PHY selection bit should be * set to 0 (external PHY). diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1e6d2335293d..30665ffe78cf 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { eth_random_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + dev_info(dev, "Use random MAC address\n"); } } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index aed45abafb1b..490d34233d38 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -434,14 +434,20 @@ static int wx_host_interface_command_r(struct wx *wx, u32 *buffer, wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, WX_SW2FW_MBOX_CMD_VLD); /* polling reply from FW */ - err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 1000, 50000, - true, wx, buffer, send_cmd); + err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 2000, + timeout * 1000, true, wx, buffer, send_cmd); if (err) { wx_err(wx, "Polling from FW messages timeout, cmd: 0x%x, index: %d\n", send_cmd, wx->swfw_index); goto rel_out; } + if (hdr->cmd_or_resp.ret_status == 0x80) { + wx_err(wx, "Unknown FW command: 0x%x\n", send_cmd); + err = -EINVAL; + goto rel_out; + } + /* expect no reply from FW then return */ if (!return_data) goto rel_out; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 4b9921b7bb11..a054b259d435 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -99,9 +99,15 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) } local_buffer = eeprom_ptrs; - for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) + for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) { + if (wx->mac.type == wx_mac_aml) { + if (i >= TXGBE_EEPROM_I2C_SRART_PTR && + i < TXGBE_EEPROM_I2C_END_PTR) + local_buffer[i] = 0xffff; + } if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) *checksum += local_buffer[i]; + } kvfree(eeprom_ptrs); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 9c1c26234cad..f423012dec22 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -158,6 +158,8 @@ #define TXGBE_EEPROM_VERSION_L 0x1D #define TXGBE_EEPROM_VERSION_H 0x1E #define TXGBE_ISCSI_BOOT_CONFIG 0x07 +#define TXGBE_EEPROM_I2C_SRART_PTR 0x580 +#define TXGBE_EEPROM_I2C_END_PTR 0x800 #define TXGBE_MAX_MSIX_VECTORS 64 #define TXGBE_MAX_FDIR_INDICES 63 diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 70f7cb383228..cb6f5482d203 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -158,7 +158,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; @@ -893,6 +892,18 @@ struct nvsp_message { sizeof(struct nvsp_message)) #define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) +/* Maximum # of contiguous data ranges that can make up a trasmitted packet. + * Typically it's the max SKB fragments plus 2 for the rndis packet and the + * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may + * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries + * in a GPA direct packet sent to netvsp over VMBus. + */ +#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT +#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2) +#else +#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT +#endif + /* Estimated requestor size: * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d6f5b9ea3109..720104661d7f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -953,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -1055,6 +1054,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev, return 0; } +/* Build an "array" of mpb entries describing the data to be transferred + * over VMBus. After the desc header fields, each "array" entry is variable + * size, and each entry starts after the end of the previous entry. The + * "offset" and "len" fields for each entry imply the size of the entry. + * + * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V + * uses that granularity, even if the system page size of the guest is larger. + * Each entry in the input "pb" array must describe a contiguous range of + * guest physical memory so that the pfns are sequential if the range crosses + * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE. + */ +static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb, + u32 page_buffer_count, + struct vmbus_packet_mpb_array *desc, + u32 *desc_size) +{ + struct hv_mpb_array *mpb_entry = &desc->range; + int i, j; + + for (i = 0; i < page_buffer_count; i++) { + u32 offset = pb[i].offset; + u32 len = pb[i].len; + + mpb_entry->offset = offset; + mpb_entry->len = len; + + for (j = 0; j < HVPFN_UP(offset + len); j++) + mpb_entry->pfn_array[j] = pb[i].pfn + j; + + mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j]; + } + + desc->rangecount = page_buffer_count; + *desc_size = (char *)mpb_entry - (char *)desc; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -1097,8 +1132,11 @@ static inline int netvsc_send_pkt( packet->dma_range = NULL; if (packet->page_buf_cnt) { + struct vmbus_channel_packet_page_buffer desc; + u32 desc_size; + if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1106,11 +1144,12 @@ static inline int netvsc_send_pkt( goto exit; } - ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); - + netvsc_build_mpb_array(pb, packet->page_buf_cnt, + (struct vmbus_packet_mpb_array *)&desc, + &desc_size); + ret = vmbus_sendpacket_mpb_desc(out_channel, + (struct vmbus_packet_mpb_array *)&desc, + desc_size, &nvmsg, sizeof(nvmsg), req_id); if (ret) netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { @@ -1259,7 +1298,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c51b318b8a72..d8b169ac0343 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -326,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -371,28 +338,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -483,7 +449,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 82747dfacd70..9e73959e61ee 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { struct hv_netvsc_packet *packet; - struct hv_page_buffer page_buf[2]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb; int ret; /* Setup the packet to send it */ @@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - pb[0].pfn = virt_to_phys(&req->request_msg) >> - HV_HYP_PAGE_SHIFT; - pb[0].len = req->request_msg.msg_len; - pb[0].offset = offset_in_hvpage(&req->request_msg); - - /* Add one page_buf when request_msg crossing page boundary */ - if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) { - packet->page_buf_cnt++; - pb[0].len = HV_HYP_PAGE_SIZE - - pb[0].offset; - pb[1].pfn = virt_to_phys((void *)&req->request_msg - + pb[0].len) >> HV_HYP_PAGE_SHIFT; - pb[1].offset = 0; - pb[1].len = req->request_msg.msg_len - - pb[0].len; - } + pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT; + pb.len = req->request_msg.msg_len; + pb.offset = offset_in_hvpage(&req->request_msg); trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); + ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false); rcu_read_unlock_bh(); return ret; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 24882d30f685..e2c6569d8c45 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2027,12 +2027,6 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } - /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes - * in this switch shall be regarded as broken. - */ - if (phydev->dev_flags & MICREL_NO_EEE) - phy_disable_eee(phydev); - return kszphy_config_init(phydev); } @@ -5705,7 +5699,6 @@ static struct phy_driver ksphy_driver[] = { .handle_interrupt = kszphy_handle_interrupt, .suspend = genphy_suspend, .resume = ksz9477_resume, - .get_features = ksz9477_get_features, } }; module_phy_driver(ksphy_driver); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index d8fc0c79745d..b75ceb90359f 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1778,8 +1778,8 @@ static void team_change_rx_flags(struct net_device *dev, int change) struct team_port *port; int inc; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) { + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { if (change & IFF_PROMISC) { inc = dev->flags & IFF_PROMISC ? 1 : -1; dev_set_promiscuity(port->dev, inc); @@ -1789,7 +1789,7 @@ static void team_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(port->dev, inc); } } - rcu_read_unlock(); + mutex_unlock(&team->lock); } static void team_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3df6aabc7e33..c676979c7ab9 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - WRITE_ONCE(netdev->mtu, new_mtu); - /* * Reset_work may be in the middle of resetting the device, wait for its * completion. @@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy_all(adapter); + WRITE_ONCE(netdev->mtu, new_mtu); vmxnet3_adjust_rx_ring_size(adapter); err = vmxnet3_rq_create_all(adapter); if (err) { @@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) "Closing it\n", err); goto out; } + } else { + WRITE_ONCE(netdev->mtu, new_mtu); } out: diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 844af16ee551..35b4ec91979e 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -1011,6 +1011,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev) int i; mt76_worker_disable(&dev->tx_worker); + napi_disable(&dev->tx_napi); netif_napi_del(&dev->tx_napi); for (i = 0; i < ARRAY_SIZE(dev->phys); i++) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index e61da76b2097..14b1f603fb62 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1924,14 +1924,14 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, mt7925_mcu_sta_mld_tlv(skb, info->vif, info->link_sta->sta); mt7925_mcu_sta_eht_mld_tlv(skb, info->vif, info->link_sta->sta); } - - mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } if (!info->enable) { mt7925_mcu_sta_remove_tlv(skb); mt76_connac_mcu_add_tlv(skb, STA_REC_MLD_OFF, sizeof(struct tlv)); + } else { + mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true); diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index 2c092ec8c0a9..3b6d759bcdf2 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -242,7 +242,7 @@ struct nvme_dhchap_key *nvme_auth_transform_key( { const char *hmac_name; struct crypto_shash *key_tfm; - struct shash_desc *shash; + SHASH_DESC_ON_STACK(shash, key_tfm); struct nvme_dhchap_key *transformed_key; int ret, key_len; @@ -267,19 +267,11 @@ struct nvme_dhchap_key *nvme_auth_transform_key( if (IS_ERR(key_tfm)) return ERR_CAST(key_tfm); - shash = kmalloc(sizeof(struct shash_desc) + - crypto_shash_descsize(key_tfm), - GFP_KERNEL); - if (!shash) { - ret = -ENOMEM; - goto out_free_key; - } - key_len = crypto_shash_digestsize(key_tfm); transformed_key = nvme_auth_alloc_key(key_len, key->hash); if (!transformed_key) { ret = -ENOMEM; - goto out_free_shash; + goto out_free_key; } shash->tfm = key_tfm; @@ -299,15 +291,12 @@ struct nvme_dhchap_key *nvme_auth_transform_key( if (ret < 0) goto out_free_transformed_key; - kfree(shash); crypto_free_shash(key_tfm); return transformed_key; out_free_transformed_key: nvme_auth_free_key(transformed_key); -out_free_shash: - kfree(shash); out_free_key: crypto_free_shash(key_tfm); diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 6115fef74c1e..f6ddbe553289 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -31,6 +31,7 @@ struct nvme_dhchap_queue_context { u32 s1; u32 s2; bool bi_directional; + bool authenticated; u16 transaction; u8 status; u8 dhgroup_id; @@ -682,6 +683,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) { nvme_auth_reset_dhchap(chap); + chap->authenticated = false; if (chap->shash_tfm) crypto_free_shash(chap->shash_tfm); if (chap->dh_tfm) @@ -930,12 +932,14 @@ static void nvme_queue_auth_work(struct work_struct *work) } if (!ret) { chap->error = 0; + chap->authenticated = true; if (ctrl->opts->concat && (ret = nvme_auth_secure_concat(ctrl, chap))) { dev_warn(ctrl->device, "%s: qid %d failed to enable secure concatenation\n", __func__, chap->qid); chap->error = ret; + chap->authenticated = false; } return; } @@ -1023,13 +1027,16 @@ static void nvme_ctrl_auth_work(struct work_struct *work) return; for (q = 1; q < ctrl->queue_count; q++) { - ret = nvme_auth_negotiate(ctrl, q); - if (ret) { - dev_warn(ctrl->device, - "qid %d: error %d setting up authentication\n", - q, ret); - break; - } + struct nvme_dhchap_queue_context *chap = + &ctrl->dhchap_ctxs[q]; + /* + * Skip re-authentication if the queue had + * not been authenticated initially. + */ + if (!chap->authenticated) + continue; + cancel_work_sync(&chap->auth_work); + queue_work(nvme_auth_wq, &chap->auth_work); } /* @@ -1037,7 +1044,13 @@ static void nvme_ctrl_auth_work(struct work_struct *work) * the controller terminates the connection. */ for (q = 1; q < ctrl->queue_count; q++) { - ret = nvme_auth_wait(ctrl, q); + struct nvme_dhchap_queue_context *chap = + &ctrl->dhchap_ctxs[q]; + if (!chap->authenticated) + continue; + flush_work(&chap->auth_work); + ret = chap->error; + nvme_auth_reset_dhchap(chap); if (ret) dev_warn(ctrl->device, "qid %d: authentication failed\n", q); @@ -1076,6 +1089,7 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) chap = &ctrl->dhchap_ctxs[i]; chap->qid = i; chap->ctrl = ctrl; + chap->authenticated = false; INIT_WORK(&chap->auth_work, nvme_queue_auth_work); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ac53629fce68..f69a232a000a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -38,6 +38,8 @@ struct nvme_ns_info { u32 nsid; __le32 anagrpid; u8 pi_offset; + u16 endgid; + u64 runs; bool is_shared; bool is_readonly; bool is_ready; @@ -150,6 +152,8 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, struct nvme_command *cmd); +static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, + u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi); void nvme_queue_scan(struct nvme_ctrl *ctrl) { @@ -664,10 +668,11 @@ static void nvme_free_ns_head(struct kref *ref) struct nvme_ns_head *head = container_of(ref, struct nvme_ns_head, ref); - nvme_mpath_remove_disk(head); + nvme_mpath_put_disk(head); ida_free(&head->subsys->ns_ida, head->instance); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); + kfree(head->plids); kfree(head); } @@ -991,6 +996,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, if (req->cmd_flags & REQ_RAHEAD) dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; + if (op == nvme_cmd_write && ns->head->nr_plids) { + u16 write_stream = req->bio->bi_write_stream; + + if (WARN_ON_ONCE(write_stream > ns->head->nr_plids)) + return BLK_STS_INVAL; + + if (write_stream) { + dsmgmt |= ns->head->plids[write_stream - 1] << 16; + control |= NVME_RW_DTYPE_DPLCMT; + } + } + if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req)) return BLK_STS_INVAL; @@ -1157,7 +1174,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (buffer && bufflen) { - ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); + ret = blk_rq_map_kern(req, buffer, bufflen, GFP_KERNEL); if (ret) goto out; } @@ -1609,6 +1626,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, info->is_shared = id->nmic & NVME_NS_NMIC_SHARED; info->is_readonly = id->nsattr & NVME_NS_ATTR_RO; info->is_ready = true; + info->endgid = le16_to_cpu(id->endgid); if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { dev_info(ctrl->device, "Ignoring bogus Namespace Identifiers\n"); @@ -1649,6 +1667,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, info->is_ready = id->nstat & NVME_NSTAT_NRDY; info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL; info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT; + info->endgid = le16_to_cpu(id->endgid); } kfree(id); return ret; @@ -1674,7 +1693,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, - u32 *result) + void *result) { return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer, buflen, result); @@ -1683,7 +1702,7 @@ EXPORT_SYMBOL_GPL(nvme_set_features); int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, - u32 *result) + void *result) { return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer, buflen, result); @@ -2059,7 +2078,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->awupf) * bs; + + /* + * Set subsystem atomic bs. + */ + if (ns->ctrl->subsys->atomic_bs) { + if (atomic_bs != ns->ctrl->subsys->atomic_bs) { + dev_err_ratelimited(ns->ctrl->device, + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", + ns->disk ? ns->disk->disk_name : "?", + ns->ctrl->subsys->atomic_bs, + atomic_bs); + } + } else + ns->ctrl->subsys->atomic_bs = atomic_bs; nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); } @@ -2153,6 +2186,148 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, return ret; } +static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl, + struct nvme_ns_info *info, u8 fdp_idx) +{ + struct nvme_fdp_config_log hdr, *h; + struct nvme_fdp_config_desc *desc; + size_t size = sizeof(hdr); + void *log, *end; + int i, n, ret; + + ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0, + NVME_CSI_NVM, &hdr, size, 0, info->endgid); + if (ret) { + dev_warn(ctrl->device, + "FDP configs log header status:0x%x endgid:%d\n", ret, + info->endgid); + return ret; + } + + size = le32_to_cpu(hdr.sze); + if (size > PAGE_SIZE * MAX_ORDER_NR_PAGES) { + dev_warn(ctrl->device, "FDP config size too large:%zu\n", + size); + return 0; + } + + h = kvmalloc(size, GFP_KERNEL); + if (!h) + return -ENOMEM; + + ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0, + NVME_CSI_NVM, h, size, 0, info->endgid); + if (ret) { + dev_warn(ctrl->device, + "FDP configs log status:0x%x endgid:%d\n", ret, + info->endgid); + goto out; + } + + n = le16_to_cpu(h->numfdpc) + 1; + if (fdp_idx > n) { + dev_warn(ctrl->device, "FDP index:%d out of range:%d\n", + fdp_idx, n); + /* Proceed without registering FDP streams */ + ret = 0; + goto out; + } + + log = h + 1; + desc = log; + end = log + size - sizeof(*h); + for (i = 0; i < fdp_idx; i++) { + log += le16_to_cpu(desc->dsze); + desc = log; + if (log >= end) { + dev_warn(ctrl->device, + "FDP invalid config descriptor list\n"); + ret = 0; + goto out; + } + } + + if (le32_to_cpu(desc->nrg) > 1) { + dev_warn(ctrl->device, "FDP NRG > 1 not supported\n"); + ret = 0; + goto out; + } + + info->runs = le64_to_cpu(desc->runs); +out: + kvfree(h); + return ret; +} + +static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info) +{ + struct nvme_ns_head *head = ns->head; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_fdp_ruh_status *ruhs; + struct nvme_fdp_config fdp; + struct nvme_command c = {}; + size_t size; + int i, ret; + + /* + * The FDP configuration is static for the lifetime of the namespace, + * so return immediately if we've already registered this namespace's + * streams. + */ + if (head->nr_plids) + return 0; + + ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0, + &fdp); + if (ret) { + dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret); + return ret; + } + + if (!(fdp.flags & FDPCFG_FDPE)) + return 0; + + ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx); + if (!info->runs) + return ret; + + size = struct_size(ruhs, ruhsd, S8_MAX - 1); + ruhs = kzalloc(size, GFP_KERNEL); + if (!ruhs) + return -ENOMEM; + + c.imr.opcode = nvme_cmd_io_mgmt_recv; + c.imr.nsid = cpu_to_le32(head->ns_id); + c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS; + c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size)); + ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size); + if (ret) { + dev_warn(ctrl->device, "FDP io-mgmt status:0x%x\n", ret); + goto free; + } + + head->nr_plids = le16_to_cpu(ruhs->nruhsd); + if (!head->nr_plids) + goto free; + + head->plids = kcalloc(head->nr_plids, sizeof(*head->plids), + GFP_KERNEL); + if (!head->plids) { + dev_warn(ctrl->device, + "failed to allocate %u FDP placement IDs\n", + head->nr_plids); + head->nr_plids = 0; + ret = -ENOMEM; + goto free; + } + + for (i = 0; i < head->nr_plids; i++) + head->plids[i] = le16_to_cpu(ruhs->ruhsd[i].pid); +free: + kfree(ruhs); + return ret; +} + static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { @@ -2190,6 +2365,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) { + ret = nvme_query_fdp_info(ns, info); + if (ret < 0) + goto out; + } + lim = queue_limits_start_update(ns->disk->queue); memflags = blk_mq_freeze_queue(ns->disk->queue); @@ -2201,6 +2382,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; + + /* + * Validate the max atomic write size fits within the subsystem's + * atomic write capabilities. + */ + if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { + blk_mq_unfreeze_queue(ns->disk->queue, memflags); + ret = -ENXIO; + goto out; + } + nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -2223,6 +2415,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (!nvme_init_integrity(ns->head, &lim, info)) capacity = 0; + lim.max_write_streams = ns->head->nr_plids; + if (lim.max_write_streams) + lim.write_stream_granularity = min(info->runs, U32_MAX); + else + lim.write_stream_granularity = 0; + ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { blk_mq_unfreeze_queue(ns->disk->queue, memflags); @@ -2326,6 +2524,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) ns->head->disk->flags |= GENHD_FL_HIDDEN; else nvme_init_integrity(ns->head, &lim, info); + lim.max_write_streams = ns_lim->max_write_streams; + lim.write_stream_granularity = ns_lim->write_stream_granularity; ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); @@ -3031,7 +3231,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) kfree(subsys); return -EINVAL; } - subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); subsys->dev.class = &nvme_subsys_class; @@ -3084,8 +3283,8 @@ out_unlock: return ret; } -int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, - void *log, size_t size, u64 offset) +static int nvme_get_log_lsi(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, + u8 lsp, u8 csi, void *log, size_t size, u64 offset, u16 lsi) { struct nvme_command c = { }; u32 dwlen = nvme_bytes_to_numd(size); @@ -3099,10 +3298,18 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); c.get_log_page.csi = csi; + c.get_log_page.lsi = cpu_to_le16(lsi); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, + void *log, size_t size, u64 offset) +{ + return nvme_get_log_lsi(ctrl, nsid, log_page, lsp, csi, log, size, + offset, 0); +} + static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { @@ -3441,7 +3648,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -3560,7 +3767,7 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, */ if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; - if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) + if (nvme_tryget_ns_head(h)) return h; } @@ -3804,7 +4011,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) } } else { ret = -EINVAL; - if (!info->is_shared || !head->shared) { + if ((!info->is_shared || !head->shared) && + !list_empty(&head->list)) { dev_err(ctrl->device, "Duplicate unshared namespace %d\n", info->nsid); @@ -4008,7 +4216,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); + if (!nvme_mpath_queue_if_no_path(ns->head)) + list_del_init(&ns->head->entry); last_path = true; } mutex_unlock(&ns->ctrl->subsys->lock); @@ -4029,7 +4238,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) synchronize_srcu(&ns->ctrl->srcu); if (last_path) - nvme_mpath_shutdown_disk(ns->head); + nvme_mpath_remove_disk(ns->head); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 2257c3c96dd2..fdafa3e9e66f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1410,9 +1410,8 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) } static void -nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +nvme_fc_xmt_ls_rsp_free(struct nvmefc_ls_rcv_op *lsop) { - struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; struct nvme_fc_rport *rport = lsop->rport; struct nvme_fc_lport *lport = rport->lport; unsigned long flags; @@ -1434,6 +1433,14 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) } static void +nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +{ + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; + + nvme_fc_xmt_ls_rsp_free(lsop); +} + +static void nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) { struct nvme_fc_rport *rport = lsop->rport; @@ -1450,7 +1457,7 @@ nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) dev_warn(lport->dev, "LLDD rejected LS RSP xmt: LS %d status %d\n", w0->ls_cmd, ret); - nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); + nvme_fc_xmt_ls_rsp_free(lsop); return; } } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 250f3da67cc9..878ea8b1a0ac 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -10,10 +10,61 @@ #include "nvme.h" bool multipath = true; -module_param(multipath, bool, 0444); +static bool multipath_always_on; + +static int multipath_param_set(const char *val, const struct kernel_param *kp) +{ + int ret; + bool *arg = kp->arg; + + ret = param_set_bool(val, kp); + if (ret) + return ret; + + if (multipath_always_on && !*arg) { + pr_err("Can't disable multipath when multipath_always_on is configured.\n"); + *arg = true; + return -EINVAL; + } + + return 0; +} + +static const struct kernel_param_ops multipath_param_ops = { + .set = multipath_param_set, + .get = param_get_bool, +}; + +module_param_cb(multipath, &multipath_param_ops, &multipath, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +static int multipath_always_on_set(const char *val, + const struct kernel_param *kp) +{ + int ret; + bool *arg = kp->arg; + + ret = param_set_bool(val, kp); + if (ret < 0) + return ret; + + if (*arg) + multipath = true; + + return 0; +} + +static const struct kernel_param_ops multipath_always_on_ops = { + .set = multipath_always_on_set, + .get = param_get_bool, +}; + +module_param_cb(multipath_always_on, &multipath_always_on_ops, + &multipath_always_on, 0444); +MODULE_PARM_DESC(multipath_always_on, + "create multipath node always except for private namespace with non-unique nsid; note that this also implicitly enables native multipath support"); + static const char *nvme_iopolicy_names[] = { [NVME_IOPOLICY_NUMA] = "numa", [NVME_IOPOLICY_RR] = "round-robin", @@ -442,7 +493,17 @@ static bool nvme_available_path(struct nvme_ns_head *head) break; } } - return false; + + /* + * If "head->delayed_removal_secs" is configured (i.e., non-zero), do + * not immediately fail I/O. Instead, requeue the I/O for the configured + * duration, anticipating that if there's a transient link failure then + * it may recover within this time window. This parameter is exported to + * userspace via sysfs, and its default value is zero. It is internally + * mapped to NVME_NSHEAD_QUEUE_IF_NO_PATH. When delayed_removal_secs is + * non-zero, this flag is set to true. When zero, the flag is cleared. + */ + return nvme_mpath_queue_if_no_path(head); } static void nvme_ns_head_submit_bio(struct bio *bio) @@ -617,6 +678,40 @@ static void nvme_requeue_work(struct work_struct *work) } } +static void nvme_remove_head(struct nvme_ns_head *head) +{ + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + kblockd_schedule_work(&head->requeue_work); + + nvme_cdev_del(&head->cdev, &head->cdev_device); + synchronize_srcu(&head->srcu); + del_gendisk(head->disk); + nvme_put_ns_head(head); + } +} + +static void nvme_remove_head_work(struct work_struct *work) +{ + struct nvme_ns_head *head = container_of(to_delayed_work(work), + struct nvme_ns_head, remove_work); + bool remove = false; + + mutex_lock(&head->subsys->lock); + if (list_empty(&head->list)) { + list_del_init(&head->entry); + remove = true; + } + mutex_unlock(&head->subsys->lock); + if (remove) + nvme_remove_head(head); + + module_put(THIS_MODULE); +} + int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) { struct queue_limits lim; @@ -626,19 +721,31 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) spin_lock_init(&head->requeue_lock); INIT_WORK(&head->requeue_work, nvme_requeue_work); INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); + INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work); + head->delayed_removal_secs = 0; /* - * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing flag - * could change after a rescan. + * If "multipath_always_on" is enabled, a multipath node is added + * regardless of whether the disk is single/multi ported, and whether + * the namespace is shared or private. If "multipath_always_on" is not + * enabled, a multipath node is added only if the subsystem supports + * multiple controllers and the "multipath" option is configured. In + * either case, for private namespaces, we ensure that the NSID is + * unique. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || - !nvme_is_unique_nsid(ctrl, head) || !multipath) + if (!multipath_always_on) { + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !multipath) + return 0; + } + + if (!nvme_is_unique_nsid(ctrl, head)) return 0; blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED; @@ -659,6 +766,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); + nvme_tryget_ns_head(head); return 0; } @@ -1015,6 +1123,49 @@ static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr } DEVICE_ATTR_RO(numa_nodes); +static ssize_t delayed_removal_secs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + int ret; + + mutex_lock(&head->subsys->lock); + ret = sysfs_emit(buf, "%u\n", head->delayed_removal_secs); + mutex_unlock(&head->subsys->lock); + return ret; +} + +static ssize_t delayed_removal_secs_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + unsigned int sec; + int ret; + + ret = kstrtouint(buf, 0, &sec); + if (ret < 0) + return ret; + + mutex_lock(&head->subsys->lock); + head->delayed_removal_secs = sec; + if (sec) + set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); + else + clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); + mutex_unlock(&head->subsys->lock); + /* + * Ensure that update to NVME_NSHEAD_QUEUE_IF_NO_PATH is seen + * by its reader. + */ + synchronize_srcu(&head->srcu); + + return count; +} + +DEVICE_ATTR_RW(delayed_removal_secs); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { @@ -1136,23 +1287,43 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) #endif } -void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) +void nvme_mpath_remove_disk(struct nvme_ns_head *head) { - if (!head->disk) - return; - if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { - nvme_cdev_del(&head->cdev, &head->cdev_device); + bool remove = false; + + mutex_lock(&head->subsys->lock); + /* + * We are called when all paths have been removed, and at that point + * head->list is expected to be empty. However, nvme_remove_ns() and + * nvme_init_ns_head() can run concurrently and so if head->delayed_ + * removal_secs is configured, it is possible that by the time we reach + * this point, head->list may no longer be empty. Therefore, we recheck + * head->list here. If it is no longer empty then we skip enqueuing the + * delayed head removal work. + */ + if (!list_empty(&head->list)) + goto out; + + if (head->delayed_removal_secs) { /* - * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared - * to allow multipath to fail all I/O. + * Ensure that no one could remove this module while the head + * remove work is pending. */ - synchronize_srcu(&head->srcu); - kblockd_schedule_work(&head->requeue_work); - del_gendisk(head->disk); + if (!try_module_get(THIS_MODULE)) + goto out; + queue_delayed_work(nvme_wq, &head->remove_work, + head->delayed_removal_secs * HZ); + } else { + list_del_init(&head->entry); + remove = true; } +out: + mutex_unlock(&head->subsys->lock); + if (remove) + nvme_remove_head(head); } -void nvme_mpath_remove_disk(struct nvme_ns_head *head) +void nvme_mpath_put_disk(struct nvme_ns_head *head) { if (!head->disk) return; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 51e078642127..ad0c1f834f09 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,6 +410,7 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -442,11 +443,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif + u32 atomic_bs; }; /* @@ -496,6 +497,9 @@ struct nvme_ns_head { struct device cdev_device; struct gendisk *disk; + + u16 nr_plids; + u16 *plids; #ifdef CONFIG_NVME_MULTIPATH struct bio_list requeue_list; spinlock_t requeue_lock; @@ -503,7 +507,10 @@ struct nvme_ns_head { struct work_struct partition_scan_work; struct mutex lock; unsigned long flags; -#define NVME_NSHEAD_DISK_LIVE 0 + struct delayed_work remove_work; + unsigned int delayed_removal_secs; +#define NVME_NSHEAD_DISK_LIVE 0 +#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1 struct nvme_ns __rcu *current_path[]; #endif }; @@ -896,10 +903,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int qid, nvme_submit_flags_t flags); int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, - u32 *result); + void *result); int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, - u32 *result); + void *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); @@ -960,7 +967,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns); void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns); void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid); -void nvme_mpath_remove_disk(struct nvme_ns_head *head); +void nvme_mpath_put_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); void nvme_mpath_update(struct nvme_ctrl *ctrl); @@ -969,7 +976,7 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_revalidate_paths(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); -void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); +void nvme_mpath_remove_disk(struct nvme_ns_head *head); void nvme_mpath_start_request(struct request *rq); void nvme_mpath_end_request(struct request *rq); @@ -986,12 +993,19 @@ extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; +extern struct device_attribute dev_attr_delayed_removal_secs; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) { return disk->fops == &nvme_ns_head_ops; } +static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head) +{ + if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags)) + return true; + return false; +} #else #define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) @@ -1012,7 +1026,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) { } -static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) +static inline void nvme_mpath_put_disk(struct nvme_ns_head *head) { } static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns) @@ -1031,7 +1045,7 @@ static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns) static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } -static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) +static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } static inline void nvme_trace_bio_complete(struct request *req) @@ -1079,6 +1093,10 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) { return false; } +static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head) +{ + return false; +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16], diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2e30e9be7408..e0bfe04a2bc2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/nodemask.h> #include <linux/once.h> #include <linux/pci.h> #include <linux/suspend.h> @@ -34,16 +35,31 @@ #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) -#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +/* Optimisation for I/Os between 4k and 128k */ +#define NVME_SMALL_POOL_SIZE 256 /* * These can be higher, but we need to ensure that any command doesn't * require an sg allocation that needs more than a page of data. */ #define NVME_MAX_KB_SZ 8192 -#define NVME_MAX_SEGS 128 -#define NVME_MAX_META_SEGS 15 -#define NVME_MAX_NR_ALLOCATIONS 5 +#define NVME_MAX_NR_DESCRIPTORS 5 + +/* + * For data SGLs we support a single descriptors worth of SGL entries, but for + * now we also limit it to avoid an allocation larger than PAGE_SIZE for the + * scatterlist. + */ +#define NVME_MAX_SEGS \ + min(NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc), \ + (PAGE_SIZE / sizeof(struct scatterlist))) + +/* + * For metadata SGLs, only the small descriptor is supported, and the first + * entry is the segment descriptor, which for the data pointer sits in the SQE. + */ +#define NVME_MAX_META_SEGS \ + ((NVME_SMALL_POOL_SIZE / sizeof(struct nvme_sgl_desc)) - 1) static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -112,6 +128,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); static void nvme_update_attrs(struct nvme_dev *dev); +struct nvme_descriptor_pools { + struct dma_pool *large; + struct dma_pool *small; +}; + /* * Represents an NVM Express device. Each nvme_dev is a PCI function. */ @@ -121,8 +142,6 @@ struct nvme_dev { struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; struct device *dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; unsigned io_queues[HCTX_MAX_TYPES]; @@ -162,6 +181,7 @@ struct nvme_dev { unsigned int nr_allocated_queues; unsigned int nr_write_queues; unsigned int nr_poll_queues; + struct nvme_descriptor_pools descriptor_pools[]; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -191,6 +211,7 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) */ struct nvme_queue { struct nvme_dev *dev; + struct nvme_descriptor_pools descriptor_pools; spinlock_t sq_lock; void *sq_cmds; /* only used for poll queues: */ @@ -219,30 +240,30 @@ struct nvme_queue { struct completion delete_done; }; -union nvme_descriptor { - struct nvme_sgl_desc *sg_list; - __le64 *prp_list; +/* bits for iod->flags */ +enum nvme_iod_flags { + /* this command has been aborted by the timeout handler */ + IOD_ABORTED = 1U << 0, + + /* uses the small descriptor pool */ + IOD_SMALL_DESCRIPTOR = 1U << 1, }; /* * The nvme_iod describes the data in an I/O. - * - * The sg pointer contains the list of PRP/SGL chunk allocations in addition - * to the actual struct scatterlist. */ struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - bool aborted; - s8 nr_allocations; /* PRP list pool allocations. 0 means small - pool in use */ + u8 flags; + u8 nr_descriptors; unsigned int dma_len; /* length of single DMA segment mapping */ dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; struct sg_table meta_sgt; - union nvme_descriptor meta_list; - union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS]; + struct nvme_sgl_desc *meta_descriptor; + void *descriptors[NVME_MAX_NR_DESCRIPTORS]; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -390,37 +411,85 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, * as it only leads to a small amount of wasted memory for the lifetime of * the I/O. */ -static int nvme_pci_npages_prp(void) +static __always_inline int nvme_pci_npages_prp(void) { unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); } -static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +static struct nvme_descriptor_pools * +nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node) { - struct nvme_dev *dev = to_nvme_dev(data); - struct nvme_queue *nvmeq = &dev->queues[0]; + struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node]; + size_t small_align = NVME_SMALL_POOL_SIZE; - WARN_ON(hctx_idx != 0); - WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); + if (pools->small) + return pools; /* already initialized */ - hctx->driver_data = nvmeq; - return 0; + pools->large = dma_pool_create_node("nvme descriptor page", dev->dev, + NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE, 0, numa_node); + if (!pools->large) + return ERR_PTR(-ENOMEM); + + if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512) + small_align = 512; + + pools->small = dma_pool_create_node("nvme descriptor small", dev->dev, + NVME_SMALL_POOL_SIZE, small_align, 0, numa_node); + if (!pools->small) { + dma_pool_destroy(pools->large); + pools->large = NULL; + return ERR_PTR(-ENOMEM); + } + + return pools; } -static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +static void nvme_release_descriptor_pools(struct nvme_dev *dev) +{ + unsigned i; + + for (i = 0; i < nr_node_ids; i++) { + struct nvme_descriptor_pools *pools = &dev->descriptor_pools[i]; + + dma_pool_destroy(pools->large); + dma_pool_destroy(pools->small); + } +} + +static int nvme_init_hctx_common(struct blk_mq_hw_ctx *hctx, void *data, + unsigned qid) { struct nvme_dev *dev = to_nvme_dev(data); - struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; + struct nvme_queue *nvmeq = &dev->queues[qid]; + struct nvme_descriptor_pools *pools; + struct blk_mq_tags *tags; + + tags = qid ? dev->tagset.tags[qid - 1] : dev->admin_tagset.tags[0]; + WARN_ON(tags != hctx->tags); + pools = nvme_setup_descriptor_pools(dev, hctx->numa_node); + if (IS_ERR(pools)) + return PTR_ERR(pools); - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); + nvmeq->descriptor_pools = *pools; hctx->driver_data = nvmeq; return 0; } +static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + WARN_ON(hctx_idx != 0); + return nvme_init_hctx_common(hctx, data, 0); +} + +static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + return nvme_init_hctx_common(hctx, data, hctx_idx + 1); +} + static int nvme_pci_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) @@ -537,23 +606,39 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req, return true; } -static void nvme_free_prps(struct nvme_dev *dev, struct request *req) +static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq, + struct nvme_iod *iod) +{ + if (iod->flags & IOD_SMALL_DESCRIPTOR) + return nvmeq->descriptor_pools.small; + return nvmeq->descriptor_pools.large; +} + +static void nvme_free_descriptors(struct nvme_queue *nvmeq, struct request *req) { const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); dma_addr_t dma_addr = iod->first_dma; int i; - for (i = 0; i < iod->nr_allocations; i++) { - __le64 *prp_list = iod->list[i].prp_list; + if (iod->nr_descriptors == 1) { + dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0], + dma_addr); + return; + } + + for (i = 0; i < iod->nr_descriptors; i++) { + __le64 *prp_list = iod->descriptors[i]; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); - dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); + dma_pool_free(nvmeq->descriptor_pools.large, prp_list, + dma_addr); dma_addr = next_dma_addr; } } -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_queue *nvmeq, + struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -566,15 +651,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) WARN_ON_ONCE(!iod->sgt.nents); dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); - - if (iod->nr_allocations == 0) - dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list, - iod->first_dma); - else if (iod->nr_allocations == 1) - dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list, - iod->first_dma); - else - nvme_free_prps(dev, req); + nvme_free_descriptors(nvmeq, req); mempool_free(iod->sgt.sgl, dev->iod_mempool); } @@ -592,11 +669,10 @@ static void nvme_print_sgl(struct scatterlist *sgl, int nents) } } -static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, +static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq, struct request *req, struct nvme_rw_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct dma_pool *pool; int length = blk_rq_payload_bytes(req); struct scatterlist *sg = iod->sgt.sgl; int dma_len = sg_dma_len(sg); @@ -604,7 +680,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); __le64 *prp_list; dma_addr_t prp_dma; - int nprps, i; + int i; length -= (NVME_CTRL_PAGE_SIZE - offset); if (length <= 0) { @@ -626,30 +702,26 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, goto done; } - nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); - if (nprps <= (256 / 8)) { - pool = dev->prp_small_pool; - iod->nr_allocations = 0; - } else { - pool = dev->prp_page_pool; - iod->nr_allocations = 1; - } + if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <= + NVME_SMALL_POOL_SIZE / sizeof(__le64)) + iod->flags |= IOD_SMALL_DESCRIPTOR; - prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); - if (!prp_list) { - iod->nr_allocations = -1; + prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, + &prp_dma); + if (!prp_list) return BLK_STS_RESOURCE; - } - iod->list[0].prp_list = prp_list; + iod->descriptors[iod->nr_descriptors++] = prp_list; iod->first_dma = prp_dma; i = 0; for (;;) { if (i == NVME_CTRL_PAGE_SIZE >> 3) { __le64 *old_prp_list = prp_list; - prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); + + prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large, + GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - iod->list[iod->nr_allocations++].prp_list = prp_list; + iod->descriptors[iod->nr_descriptors++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -673,7 +745,7 @@ done: cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); return BLK_STS_OK; free_prps: - nvme_free_prps(dev, req); + nvme_free_descriptors(nvmeq, req); return BLK_STS_RESOURCE; bad_sgl: WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents), @@ -698,11 +770,10 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; } -static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, +static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq, struct request *req, struct nvme_rw_command *cmd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct dma_pool *pool; struct nvme_sgl_desc *sg_list; struct scatterlist *sg = iod->sgt.sgl; unsigned int entries = iod->sgt.nents; @@ -717,21 +788,14 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_OK; } - if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { - pool = dev->prp_small_pool; - iod->nr_allocations = 0; - } else { - pool = dev->prp_page_pool; - iod->nr_allocations = 1; - } + if (entries <= NVME_SMALL_POOL_SIZE / sizeof(*sg_list)) + iod->flags |= IOD_SMALL_DESCRIPTOR; - sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); - if (!sg_list) { - iod->nr_allocations = -1; + sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, + &sgl_dma); + if (!sg_list) return BLK_STS_RESOURCE; - } - - iod->list[0].sg_list = sg_list; + iod->descriptors[iod->nr_descriptors++] = sg_list; iod->first_dma = sgl_dma; nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); @@ -785,12 +849,12 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret = BLK_STS_RESOURCE; int rc; if (blk_rq_nr_phys_segments(req) == 1) { - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct bio_vec bv = req_bvec(req); if (!is_pci_p2pdma_page(bv.bv_page)) { @@ -825,9 +889,9 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, } if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); + ret = nvme_pci_setup_sgls(nvmeq, req, &cmnd->rw); else - ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); + ret = nvme_pci_setup_prps(nvmeq, req, &cmnd->rw); if (ret != BLK_STS_OK) goto out_unmap_sg; return BLK_STS_OK; @@ -842,6 +906,7 @@ out_free_sg: static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev, struct request *req) { + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_rw_command *cmnd = &iod->cmd.rw; struct nvme_sgl_desc *sg_list; @@ -865,12 +930,13 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev, if (rc) goto out_free_sg; - sg_list = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &sgl_dma); + sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC, + &sgl_dma); if (!sg_list) goto out_unmap_sg; entries = iod->meta_sgt.nents; - iod->meta_list.sg_list = sg_list; + iod->meta_descriptor = sg_list; iod->meta_dma = sgl_dma; cmnd->flags = NVME_CMD_SGL_METASEG; @@ -912,7 +978,10 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct nvme_dev *dev, static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req) { - if (nvme_pci_metadata_use_sgls(dev, req)) + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + + if ((iod->cmd.common.flags & NVME_CMD_SGL_METABUF) && + nvme_pci_metadata_use_sgls(dev, req)) return nvme_pci_setup_meta_sgls(dev, req); return nvme_pci_setup_meta_mptr(dev, req); } @@ -922,8 +991,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; - iod->aborted = false; - iod->nr_allocations = -1; + iod->flags = 0; + iod->nr_descriptors = 0; iod->sgt.nents = 0; iod->meta_sgt.nents = 0; @@ -947,7 +1016,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) return BLK_STS_OK; out_unmap_data: if (blk_rq_nr_phys_segments(req)) - nvme_unmap_data(dev, req); + nvme_unmap_data(dev, req->mq_hctx->driver_data, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; @@ -1037,6 +1106,7 @@ static void nvme_queue_rqs(struct rq_list *rqlist) } static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev, + struct nvme_queue *nvmeq, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1048,8 +1118,8 @@ static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev, return; } - dma_pool_free(dev->prp_small_pool, iod->meta_list.sg_list, - iod->meta_dma); + dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor, + iod->meta_dma); dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0); mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool); } @@ -1060,10 +1130,10 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req) struct nvme_dev *dev = nvmeq->dev; if (blk_integrity_rq(req)) - nvme_unmap_metadata(dev, req); + nvme_unmap_metadata(dev, nvmeq, req); if (blk_rq_nr_phys_segments(req)) - nvme_unmap_data(dev, req); + nvme_unmap_data(dev, nvmeq, req); } static void nvme_pci_complete_rq(struct request *req) @@ -1202,7 +1272,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); + spin_unlock(&nvmeq->cq_poll_lock); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } @@ -1488,7 +1560,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * returned to the driver, or if this is the admin queue. */ opcode = nvme_req(req)->cmd->common.opcode; - if (!nvmeq->qid || iod->aborted) { + if (!nvmeq->qid || (iod->flags & IOD_ABORTED)) { dev_warn(dev->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", req->tag, nvme_cid(req), opcode, @@ -1501,7 +1573,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - iod->aborted = true; + iod->flags |= IOD_ABORTED; cmd.abort.opcode = nvme_admin_abort_cmd; cmd.abort.cid = nvme_cid(req); @@ -2840,35 +2912,6 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) return 0; } -static int nvme_setup_prp_pools(struct nvme_dev *dev) -{ - size_t small_align = 256; - - dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, - NVME_CTRL_PAGE_SIZE, - NVME_CTRL_PAGE_SIZE, 0); - if (!dev->prp_page_pool) - return -ENOMEM; - - if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512) - small_align = 512; - - /* Optimisation for I/Os between 4k and 128k */ - dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, - 256, small_align, 0); - if (!dev->prp_small_pool) { - dma_pool_destroy(dev->prp_page_pool); - return -ENOMEM; - } - return 0; -} - -static void nvme_release_prp_pools(struct nvme_dev *dev) -{ - dma_pool_destroy(dev->prp_page_pool); - dma_pool_destroy(dev->prp_small_pool); -} - static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) { size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1); @@ -3183,7 +3226,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, struct nvme_dev *dev; int ret = -ENOMEM; - dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); + dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), + GFP_KERNEL, node); if (!dev) return ERR_PTR(-ENOMEM); INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); @@ -3258,13 +3302,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto out_uninit_ctrl; - result = nvme_setup_prp_pools(dev); - if (result) - goto out_dev_unmap; - result = nvme_pci_alloc_iod_mempool(dev); if (result) - goto out_release_prp_pools; + goto out_dev_unmap; dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); @@ -3340,8 +3380,6 @@ out_disable: out_release_iod_mempool: mempool_destroy(dev->iod_mempool); mempool_destroy(dev->iod_meta_mempool); -out_release_prp_pools: - nvme_release_prp_pools(dev); out_dev_unmap: nvme_dev_unmap(dev); out_uninit_ctrl: @@ -3406,7 +3444,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_queues(dev, 0); mempool_destroy(dev->iod_mempool); mempool_destroy(dev->iod_meta_mempool); - nvme_release_prp_pools(dev); + nvme_release_descriptor_pools(dev); nvme_dev_unmap(dev); nvme_uninit_ctrl(&dev->ctrl); } @@ -3737,6 +3775,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ @@ -3805,9 +3845,7 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE); - BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE); - BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); + BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_DESCRIPTORS); return pci_register_driver(&nvme_driver); } diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6d31226f7a4f..29430949ce2f 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -260,6 +260,7 @@ static struct attribute *nvme_ns_attrs[] = { &dev_attr_ana_state.attr, &dev_attr_queue_depth.attr, &dev_attr_numa_nodes.attr, + &dev_attr_delayed_removal_secs.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, NULL, @@ -296,6 +297,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; } + if (a == &dev_attr_delayed_removal_secs.attr) { + struct gendisk *disk = dev_to_disk(dev); + + if (!nvme_disk_is_ns_head(disk)) + return 0; + } #endif return a->mode; } @@ -306,13 +313,41 @@ static const struct attribute_group nvme_ns_attr_group = { }; #ifdef CONFIG_NVME_MULTIPATH +/* + * NOTE: The dummy attribute does not appear in sysfs. It exists solely to allow + * control over the visibility of the multipath sysfs node. Without at least one + * attribute defined in nvme_ns_mpath_attrs[], the sysfs implementation does not + * invoke the multipath_sysfs_group_visible() method. As a result, we would not + * be able to control the visibility of the multipath sysfs node. + */ +static struct attribute dummy_attr = { + .name = "dummy", +}; + static struct attribute *nvme_ns_mpath_attrs[] = { + &dummy_attr, NULL, }; +static bool multipath_sysfs_group_visible(struct kobject *kobj) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + return nvme_disk_is_ns_head(dev_to_disk(dev)); +} + +static bool multipath_sysfs_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return false; +} + +DEFINE_SYSFS_GROUP_VISIBLE(multipath_sysfs) + const struct attribute_group nvme_ns_mpath_attr_group = { .name = "multipath", .attrs = nvme_ns_mpath_attrs, + .is_visible = SYSFS_GROUP_VISIBLE(multipath_sysfs), }; #endif diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index aba365f97cf6..853bc67d045c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -403,7 +403,7 @@ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) } static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, - bool sync, bool last) + bool last) { struct nvme_tcp_queue *queue = req->queue; bool empty; @@ -417,7 +417,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * are on the same cpu, so we don't introduce contention. */ if (queue->io_cpu == raw_smp_processor_id() && - sync && empty && mutex_trylock(&queue->send_mutex)) { + empty && mutex_trylock(&queue->send_mutex)) { nvme_tcp_send_all(queue); mutex_unlock(&queue->send_mutex); } @@ -770,7 +770,9 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, req->ttag = pdu->ttag; nvme_tcp_setup_h2c_data_pdu(req); - nvme_tcp_queue_request(req, false, true); + + llist_add(&req->lentry, &queue->req_list); + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); return 0; } @@ -2385,7 +2387,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) if (ret) return ret; - if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) { + if (ctrl->opts->concat && !ctrl->tls_pskid) { /* See comments for nvme_tcp_key_revoke_needed() */ dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); nvme_stop_keep_alive(ctrl); @@ -2637,7 +2639,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; - nvme_tcp_queue_request(&ctrl->async_req, true, true); + nvme_tcp_queue_request(&ctrl->async_req, true); } static void nvme_tcp_complete_timed_out(struct request *rq) @@ -2789,7 +2791,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_start_request(rq); - nvme_tcp_queue_request(req, true, bd->last); + nvme_tcp_queue_request(req, bd->last); return BLK_STS_OK; } diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index acc138bbf8f2..c7317299078d 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -63,14 +63,9 @@ static void nvmet_execute_create_sq(struct nvmet_req *req) if (status != NVME_SC_SUCCESS) goto complete; - /* - * Note: The NVMe specification allows multiple SQs to use the same CQ. - * However, the target code does not really support that. So for now, - * prevent this and fail the command if sqid and cqid are different. - */ - if (!cqid || cqid != sqid) { - pr_err("SQ %u: Unsupported CQID %u\n", sqid, cqid); - status = NVME_SC_CQ_INVALID | NVME_STATUS_DNR; + status = nvmet_check_io_cqid(ctrl, cqid, false); + if (status != NVME_SC_SUCCESS) { + pr_err("SQ %u: Invalid CQID %u\n", sqid, cqid); goto complete; } @@ -79,7 +74,7 @@ static void nvmet_execute_create_sq(struct nvmet_req *req) goto complete; } - status = ctrl->ops->create_sq(ctrl, sqid, sq_flags, qsize, prp1); + status = ctrl->ops->create_sq(ctrl, sqid, cqid, sq_flags, qsize, prp1); complete: nvmet_req_complete(req, status); @@ -96,14 +91,15 @@ static void nvmet_execute_delete_cq(struct nvmet_req *req) goto complete; } - if (!cqid) { - status = NVME_SC_QID_INVALID | NVME_STATUS_DNR; + status = nvmet_check_io_cqid(ctrl, cqid, false); + if (status != NVME_SC_SUCCESS) goto complete; - } - status = nvmet_check_cqid(ctrl, cqid); - if (status != NVME_SC_SUCCESS) + if (!ctrl->cqs[cqid] || nvmet_cq_in_use(ctrl->cqs[cqid])) { + /* Some SQs are still using this CQ */ + status = NVME_SC_QID_INVALID | NVME_STATUS_DNR; goto complete; + } status = ctrl->ops->delete_cq(ctrl, cqid); @@ -127,12 +123,7 @@ static void nvmet_execute_create_cq(struct nvmet_req *req) goto complete; } - if (!cqid) { - status = NVME_SC_QID_INVALID | NVME_STATUS_DNR; - goto complete; - } - - status = nvmet_check_cqid(ctrl, cqid); + status = nvmet_check_io_cqid(ctrl, cqid, true); if (status != NVME_SC_SUCCESS) goto complete; diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 9429b8218408..b340380f3892 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -280,9 +280,12 @@ void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) bool nvmet_check_auth_status(struct nvmet_req *req) { - if (req->sq->ctrl->host_key && - !req->sq->authenticated) - return false; + if (req->sq->ctrl->host_key) { + if (req->sq->qid > 0) + return true; + if (!req->sq->authenticated) + return false; + } return true; } @@ -290,7 +293,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, unsigned int shash_len) { struct crypto_shash *shash_tfm; - struct shash_desc *shash; + SHASH_DESC_ON_STACK(shash, shash_tfm); struct nvmet_ctrl *ctrl = req->sq->ctrl; const char *hash_name; u8 *challenge = req->sq->dhchap_c1; @@ -342,19 +345,13 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, req->sq->dhchap_c1, challenge, shash_len); if (ret) - goto out_free_challenge; + goto out; } pr_debug("ctrl %d qid %d host response seq %u transaction %d\n", ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1, req->sq->dhchap_tid); - shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm), - GFP_KERNEL); - if (!shash) { - ret = -ENOMEM; - goto out_free_challenge; - } shash->tfm = shash_tfm; ret = crypto_shash_init(shash); if (ret) @@ -389,8 +386,6 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, goto out; ret = crypto_shash_final(shash, response); out: - kfree(shash); -out_free_challenge: if (challenge != req->sq->dhchap_c1) kfree(challenge); out_free_response: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 245475c43127..db7b17d1094e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -813,11 +813,43 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status) } EXPORT_SYMBOL_GPL(nvmet_req_complete); +void nvmet_cq_init(struct nvmet_cq *cq) +{ + refcount_set(&cq->ref, 1); +} +EXPORT_SYMBOL_GPL(nvmet_cq_init); + +bool nvmet_cq_get(struct nvmet_cq *cq) +{ + return refcount_inc_not_zero(&cq->ref); +} +EXPORT_SYMBOL_GPL(nvmet_cq_get); + +void nvmet_cq_put(struct nvmet_cq *cq) +{ + if (refcount_dec_and_test(&cq->ref)) + nvmet_cq_destroy(cq); +} +EXPORT_SYMBOL_GPL(nvmet_cq_put); + void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size) { cq->qid = qid; cq->size = size; + + ctrl->cqs[qid] = cq; +} + +void nvmet_cq_destroy(struct nvmet_cq *cq) +{ + struct nvmet_ctrl *ctrl = cq->ctrl; + + if (ctrl) { + ctrl->cqs[cq->qid] = NULL; + nvmet_ctrl_put(cq->ctrl); + cq->ctrl = NULL; + } } void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, @@ -837,37 +869,47 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) complete(&sq->confirm_done); } -u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid) +u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) { - if (!ctrl->sqs) + if (!ctrl->cqs) return NVME_SC_INTERNAL | NVME_STATUS_DNR; if (cqid > ctrl->subsys->max_qid) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; - /* - * Note: For PCI controllers, the NVMe specifications allows multiple - * SQs to share a single CQ. However, we do not support this yet, so - * check that there is no SQ defined for a CQ. If one exist, then the - * CQ ID is invalid for creation as well as when the CQ is being - * deleted (as that would mean that the SQ was not deleted before the - * CQ). - */ - if (ctrl->sqs[cqid]) + if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; return NVME_SC_SUCCESS; } +u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) +{ + if (!cqid) + return NVME_SC_QID_INVALID | NVME_STATUS_DNR; + return nvmet_check_cqid(ctrl, cqid, create); +} + +bool nvmet_cq_in_use(struct nvmet_cq *cq) +{ + return refcount_read(&cq->ref) > 1; +} +EXPORT_SYMBOL_GPL(nvmet_cq_in_use); + u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size) { u16 status; - status = nvmet_check_cqid(ctrl, qid); + status = nvmet_check_cqid(ctrl, qid, true); if (status != NVME_SC_SUCCESS) return status; + if (!kref_get_unless_zero(&ctrl->ref)) + return NVME_SC_INTERNAL | NVME_STATUS_DNR; + cq->ctrl = ctrl; + + nvmet_cq_init(cq); nvmet_cq_setup(ctrl, cq, qid, size); return NVME_SC_SUCCESS; @@ -891,7 +933,7 @@ u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, } u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, - u16 sqid, u16 size) + struct nvmet_cq *cq, u16 sqid, u16 size) { u16 status; int ret; @@ -903,7 +945,7 @@ u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, if (status != NVME_SC_SUCCESS) return status; - ret = nvmet_sq_init(sq); + ret = nvmet_sq_init(sq, cq); if (ret) { status = NVME_SC_INTERNAL | NVME_STATUS_DNR; goto ctrl_put; @@ -935,6 +977,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); nvmet_auth_sq_free(sq); + nvmet_cq_put(sq->cq); /* * we must reference the ctrl again after waiting for inflight IO @@ -967,18 +1010,23 @@ static void nvmet_sq_free(struct percpu_ref *ref) complete(&sq->free_done); } -int nvmet_sq_init(struct nvmet_sq *sq) +int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) { int ret; + if (!nvmet_cq_get(cq)) + return -EINVAL; + ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); if (ret) { pr_err("percpu_ref init failed!\n"); + nvmet_cq_put(cq); return ret; } init_completion(&sq->free_done); init_completion(&sq->confirm_done); nvmet_auth_sq_init(sq); + sq->cq = cq; return 0; } @@ -1108,13 +1156,13 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) return ret; } -bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) +bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, + const struct nvmet_fabrics_ops *ops) { u8 flags = req->cmd->common.flags; u16 status; - req->cq = cq; + req->cq = sq->cq; req->sq = sq; req->ops = ops; req->sg = NULL; @@ -1612,12 +1660,17 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) if (!ctrl->sqs) goto out_free_changed_ns_list; + ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), + GFP_KERNEL); + if (!ctrl->cqs) + goto out_free_sqs; + ret = ida_alloc_range(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); if (ret < 0) { args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; - goto out_free_sqs; + goto out_free_cqs; } ctrl->cntlid = ret; @@ -1676,6 +1729,8 @@ init_pr_fail: mutex_unlock(&subsys->lock); nvmet_stop_keep_alive_timer(ctrl); ida_free(&cntlid_ida, ctrl->cntlid); +out_free_cqs: + kfree(ctrl->cqs); out_free_sqs: kfree(ctrl->sqs); out_free_changed_ns_list: @@ -1712,6 +1767,7 @@ static void nvmet_ctrl_free(struct kref *ref) nvmet_async_events_free(ctrl); kfree(ctrl->sqs); + kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); kfree(ctrl); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index df7207640506..c06f3e04296c 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -119,7 +119,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); - strncpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); + strscpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); } /* diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f012bdf89850..7b8d8b397802 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -208,6 +208,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; } + kref_get(&ctrl->ref); + old = cmpxchg(&req->cq->ctrl, NULL, ctrl); + if (old) { + pr_warn("queue already connected!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, opcode); + return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; + } + /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); @@ -239,8 +247,8 @@ static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) bool needs_auth = nvmet_has_auth(ctrl, sq); key_serial_t keyid = nvmet_queue_tls_keyid(sq); - /* Do not authenticate I/O queues for secure concatenation */ - if (ctrl->concat && sq->qid) + /* Do not authenticate I/O queues */ + if (sq->qid) needs_auth = false; if (keyid) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7b50130f10f6..254537b93e63 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -816,7 +816,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue); - ret = nvmet_sq_init(&queue->nvme_sq); + nvmet_cq_init(&queue->nvme_cq); + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); if (ret) goto out_fail_iodlist; @@ -826,6 +827,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, return queue; out_fail_iodlist: + nvmet_cq_put(&queue->nvme_cq); nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); out_free_queue: @@ -934,6 +936,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) flush_workqueue(queue->work_q); nvmet_sq_destroy(&queue->nvme_sq); + nvmet_cq_put(&queue->nvme_cq); nvmet_fc_tgt_q_put(queue); } @@ -1254,6 +1257,7 @@ nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport, { lockdep_assert_held(&nvmet_fc_tgtlock); + nvmet_fc_tgtport_get(tgtport); pe->tgtport = tgtport; tgtport->pe = pe; @@ -1273,8 +1277,10 @@ nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe) unsigned long flags; spin_lock_irqsave(&nvmet_fc_tgtlock, flags); - if (pe->tgtport) + if (pe->tgtport) { + nvmet_fc_tgtport_put(pe->tgtport); pe->tgtport->pe = NULL; + } list_del(&pe->pe_list); spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); } @@ -1292,8 +1298,10 @@ nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport) spin_lock_irqsave(&nvmet_fc_tgtlock, flags); pe = tgtport->pe; - if (pe) + if (pe) { + nvmet_fc_tgtport_put(pe->tgtport); pe->tgtport = NULL; + } tgtport->pe = NULL; spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); } @@ -1316,6 +1324,9 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { if (tgtport->fc_target_port.node_name == pe->node_name && tgtport->fc_target_port.port_name == pe->port_name) { + if (!nvmet_fc_tgtport_get(tgtport)) + continue; + WARN_ON(pe->tgtport); tgtport->pe = pe; pe->tgtport = tgtport; @@ -1580,6 +1591,39 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); } +static void +nvmet_fc_free_pending_reqs(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + struct nvmet_fc_ls_iod *iod; + int i; + + iod = tgtport->iod; + for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) + cancel_work(&iod->work); + + /* + * After this point the connection is lost and thus any pending + * request can't be processed by the normal completion path. This + * is likely a request from nvmet_fc_send_ls_req_async. + */ + while ((lsop = list_first_entry_or_null(&tgtport->ls_req_list, + struct nvmet_fc_ls_req_op, lsreq_list))) { + list_del(&lsop->lsreq_list); + + if (!lsop->req_queued) + continue; + + lsreq = &lsop->ls_req; + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); + nvmet_fc_tgtport_put(tgtport); + kfree(lsop); + } +} + /** * nvmet_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously @@ -1608,13 +1652,7 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) flush_workqueue(nvmet_wq); - /* - * should terminate LS's as well. However, LS's will be generated - * at the tail end of association termination, so they likely don't - * exist yet. And even if they did, it's worthwhile to just let - * them finish and targetport ref counting will clean things up. - */ - + nvmet_fc_free_pending_reqs(tgtport); nvmet_fc_tgtport_put(tgtport); return 0; @@ -2531,10 +2569,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->data_sg = NULL; fod->data_sg_cnt = 0; - ret = nvmet_req_init(&fod->req, - &fod->queue->nvme_cq, - &fod->queue->nvme_sq, - &nvmet_fc_tgt_fcp_ops); + ret = nvmet_req_init(&fod->req, &fod->queue->nvme_sq, + &nvmet_fc_tgt_fcp_ops); if (!ret) { /* bad SQE content or invalid ctrl state */ /* nvmet layer has already called op done to send rsp. */ @@ -2860,12 +2896,17 @@ nvmet_fc_add_port(struct nvmet_port *port) list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { + if (!nvmet_fc_tgtport_get(tgtport)) + continue; + /* a FC port can only be 1 nvmet port id */ if (!tgtport->pe) { nvmet_fc_portentry_bind(tgtport, pe, port); ret = 0; } else ret = -EALREADY; + + nvmet_fc_tgtport_put(tgtport); break; } } @@ -2881,11 +2922,21 @@ static void nvmet_fc_remove_port(struct nvmet_port *port) { struct nvmet_fc_port_entry *pe = port->priv; + struct nvmet_fc_tgtport *tgtport = NULL; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport)) + tgtport = pe->tgtport; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); nvmet_fc_portentry_unbind(pe); - /* terminate any outstanding associations */ - __nvmet_fc_free_assocs(pe->tgtport); + if (tgtport) { + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(tgtport); + nvmet_fc_tgtport_put(tgtport); + } kfree(pe); } @@ -2894,10 +2945,21 @@ static void nvmet_fc_discovery_chg(struct nvmet_port *port) { struct nvmet_fc_port_entry *pe = port->priv; - struct nvmet_fc_tgtport *tgtport = pe->tgtport; + struct nvmet_fc_tgtport *tgtport = NULL; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport)) + tgtport = pe->tgtport; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + if (!tgtport) + return; if (tgtport && tgtport->ops->discovery_event) tgtport->ops->discovery_event(&tgtport->fc_target_port); + + nvmet_fc_tgtport_put(tgtport); } static ssize_t diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 641201e62c1b..257b497d515a 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -207,7 +207,6 @@ static LIST_HEAD(fcloop_nports); struct fcloop_lport { struct nvme_fc_local_port *localport; struct list_head lport_list; - struct completion unreg_done; refcount_t ref; }; @@ -215,6 +214,9 @@ struct fcloop_lport_priv { struct fcloop_lport *lport; }; +/* The port is already being removed, avoid double free */ +#define PORT_DELETED 0 + struct fcloop_rport { struct nvme_fc_remote_port *remoteport; struct nvmet_fc_target_port *targetport; @@ -223,6 +225,7 @@ struct fcloop_rport { spinlock_t lock; struct list_head ls_list; struct work_struct ls_work; + unsigned long flags; }; struct fcloop_tport { @@ -233,6 +236,7 @@ struct fcloop_tport { spinlock_t lock; struct list_head ls_list; struct work_struct ls_work; + unsigned long flags; }; struct fcloop_nport { @@ -288,6 +292,9 @@ struct fcloop_ini_fcpreq { spinlock_t inilock; }; +/* SLAB cache for fcloop_lsreq structures */ +static struct kmem_cache *lsreq_cache; + static inline struct fcloop_lsreq * ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) { @@ -338,6 +345,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work) * callee may free memory containing tls_req. * do not reference lsreq after this. */ + kmem_cache_free(lsreq_cache, tls_req); spin_lock(&rport->lock); } @@ -349,10 +357,13 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { - struct fcloop_lsreq *tls_req = lsreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_lsreq *tls_req; int ret = 0; + tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL); + if (!tls_req) + return -ENOMEM; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -389,14 +400,17 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, lsrsp->done(lsrsp); - if (remoteport) { - rport = remoteport->private; - spin_lock(&rport->lock); - list_add_tail(&tls_req->ls_list, &rport->ls_list); - spin_unlock(&rport->lock); - queue_work(nvmet_wq, &rport->ls_work); + if (!remoteport) { + kmem_cache_free(lsreq_cache, tls_req); + return 0; } + rport = remoteport->private; + spin_lock(&rport->lock); + list_add_tail(&tls_req->ls_list, &rport->ls_list); + spin_unlock(&rport->lock); + queue_work(nvmet_wq, &rport->ls_work); + return 0; } @@ -422,6 +436,7 @@ fcloop_tport_lsrqst_work(struct work_struct *work) * callee may free memory containing tls_req. * do not reference lsreq after this. */ + kmem_cache_free(lsreq_cache, tls_req); spin_lock(&tport->lock); } @@ -432,8 +447,8 @@ static int fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, struct nvmefc_ls_req *lsreq) { - struct fcloop_lsreq *tls_req = lsreq->private; struct fcloop_tport *tport = targetport->private; + struct fcloop_lsreq *tls_req; int ret = 0; /* @@ -441,6 +456,10 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, * hosthandle ignored as fcloop currently is * 1:1 tgtport vs remoteport */ + + tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL); + if (!tls_req) + return -ENOMEM; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -457,6 +476,9 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, lsreq->rqstaddr, lsreq->rqstlen); + if (ret) + kmem_cache_free(lsreq_cache, tls_req); + return ret; } @@ -471,18 +493,30 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, struct nvmet_fc_target_port *targetport = rport->targetport; struct fcloop_tport *tport; + if (!targetport) { + /* + * The target port is gone. The target doesn't expect any + * response anymore and the ->done call is not valid + * because the resources have been freed by + * nvmet_fc_free_pending_reqs. + * + * We end up here from delete association exchange: + * nvmet_fc_xmt_disconnect_assoc sends an async request. + */ + kmem_cache_free(lsreq_cache, tls_req); + return 0; + } + memcpy(lsreq->rspaddr, lsrsp->rspbuf, ((lsreq->rsplen < lsrsp->rsplen) ? lsreq->rsplen : lsrsp->rsplen)); lsrsp->done(lsrsp); - if (targetport) { - tport = targetport->private; - spin_lock(&tport->lock); - list_add_tail(&tls_req->ls_list, &tport->ls_list); - spin_unlock(&tport->lock); - queue_work(nvmet_wq, &tport->ls_work); - } + tport = targetport->private; + spin_lock(&tport->lock); + list_add_tail(&tls_req->ls_list, &tport->ls_list); + spin_unlock(&tport->lock); + queue_work(nvmet_wq, &tport->ls_work); return 0; } @@ -566,7 +600,8 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, } /* release original io reference on tgt struct */ - fcloop_tfcp_req_put(tfcp_req); + if (tfcp_req) + fcloop_tfcp_req_put(tfcp_req); } static bool drop_fabric_opcode; @@ -618,12 +653,13 @@ fcloop_fcp_recv_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, fcp_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; unsigned long flags; int ret = 0; bool aborted = false; spin_lock_irqsave(&tfcp_req->reqlock, flags); + fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_START: tfcp_req->inistate = INI_IO_ACTIVE; @@ -638,16 +674,19 @@ fcloop_fcp_recv_work(struct work_struct *work) } spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - if (unlikely(aborted)) - ret = -ECANCELED; - else { - if (likely(!check_for_drop(tfcp_req))) - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, - &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); - else - pr_info("%s: dropped command ********\n", __func__); + if (unlikely(aborted)) { + /* the abort handler will call fcloop_call_host_done */ + return; + } + + if (unlikely(check_for_drop(tfcp_req))) { + pr_info("%s: dropped command ********\n", __func__); + return; } + + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); if (ret) fcloop_call_host_done(fcpreq, tfcp_req, ret); } @@ -662,15 +701,17 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&tfcp_req->reqlock, flags); - fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_ABORTED: + fcpreq = tfcp_req->fcpreq; + tfcp_req->fcpreq = NULL; break; case INI_IO_COMPLETED: completed = true; break; default: spin_unlock_irqrestore(&tfcp_req->reqlock, flags); + fcloop_tfcp_req_put(tfcp_req); WARN_ON(1); return; } @@ -686,10 +727,6 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req); - spin_lock_irqsave(&tfcp_req->reqlock, flags); - tfcp_req->fcpreq = NULL; - spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); /* call_host_done releases reference for abort downcall */ } @@ -958,13 +995,16 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, spin_lock(&inireq->inilock); tfcp_req = inireq->tfcp_req; - if (tfcp_req) - fcloop_tfcp_req_get(tfcp_req); + if (tfcp_req) { + if (!fcloop_tfcp_req_get(tfcp_req)) + tfcp_req = NULL; + } spin_unlock(&inireq->inilock); - if (!tfcp_req) + if (!tfcp_req) { /* abort has already been called */ - return; + goto out_host_done; + } /* break initiator/target relationship for io */ spin_lock_irqsave(&tfcp_req->reqlock, flags); @@ -979,7 +1019,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, default: spin_unlock_irqrestore(&tfcp_req->reqlock, flags); WARN_ON(1); - return; + goto out_host_done; } spin_unlock_irqrestore(&tfcp_req->reqlock, flags); @@ -993,6 +1033,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, */ fcloop_tfcp_req_put(tfcp_req); } + + return; + +out_host_done: + fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); } static void @@ -1019,9 +1064,18 @@ fcloop_lport_get(struct fcloop_lport *lport) static void fcloop_nport_put(struct fcloop_nport *nport) { + unsigned long flags; + if (!refcount_dec_and_test(&nport->ref)) return; + spin_lock_irqsave(&fcloop_lock, flags); + list_del(&nport->nport_list); + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (nport->lport) + fcloop_lport_put(nport->lport); + kfree(nport); } @@ -1037,9 +1091,6 @@ fcloop_localport_delete(struct nvme_fc_local_port *localport) struct fcloop_lport_priv *lport_priv = localport->private; struct fcloop_lport *lport = lport_priv->lport; - /* release any threads waiting for the unreg to complete */ - complete(&lport->unreg_done); - fcloop_lport_put(lport); } @@ -1047,18 +1098,38 @@ static void fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) { struct fcloop_rport *rport = remoteport->private; + bool put_port = false; + unsigned long flags; flush_work(&rport->ls_work); - fcloop_nport_put(rport->nport); + + spin_lock_irqsave(&fcloop_lock, flags); + if (!test_and_set_bit(PORT_DELETED, &rport->flags)) + put_port = true; + rport->nport->rport = NULL; + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (put_port) + fcloop_nport_put(rport->nport); } static void fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) { struct fcloop_tport *tport = targetport->private; + bool put_port = false; + unsigned long flags; flush_work(&tport->ls_work); - fcloop_nport_put(tport->nport); + + spin_lock_irqsave(&fcloop_lock, flags); + if (!test_and_set_bit(PORT_DELETED, &tport->flags)) + put_port = true; + tport->nport->tport = NULL; + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (put_port) + fcloop_nport_put(tport->nport); } #define FCLOOP_HW_QUEUES 4 @@ -1082,7 +1153,6 @@ static struct nvme_fc_port_template fctemplate = { /* sizes of additional private data for data structures */ .local_priv_sz = sizeof(struct fcloop_lport_priv), .remote_priv_sz = sizeof(struct fcloop_rport), - .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; @@ -1105,7 +1175,6 @@ static struct nvmet_fc_target_template tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), - .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), }; static ssize_t @@ -1170,51 +1239,92 @@ out_free_lport: } static int -__wait_localport_unreg(struct fcloop_lport *lport) +__localport_unreg(struct fcloop_lport *lport) { - int ret; + return nvme_fc_unregister_localport(lport->localport); +} - init_completion(&lport->unreg_done); +static struct fcloop_nport * +__fcloop_nport_lookup(u64 node_name, u64 port_name) +{ + struct fcloop_nport *nport; - ret = nvme_fc_unregister_localport(lport->localport); + list_for_each_entry(nport, &fcloop_nports, nport_list) { + if (nport->node_name != node_name || + nport->port_name != port_name) + continue; - if (!ret) - wait_for_completion(&lport->unreg_done); + if (fcloop_nport_get(nport)) + return nport; - return ret; + break; + } + + return NULL; } +static struct fcloop_nport * +fcloop_nport_lookup(u64 node_name, u64 port_name) +{ + struct fcloop_nport *nport; + unsigned long flags; + + spin_lock_irqsave(&fcloop_lock, flags); + nport = __fcloop_nport_lookup(node_name, port_name); + spin_unlock_irqrestore(&fcloop_lock, flags); + + return nport; +} + +static struct fcloop_lport * +__fcloop_lport_lookup(u64 node_name, u64 port_name) +{ + struct fcloop_lport *lport; + + list_for_each_entry(lport, &fcloop_lports, lport_list) { + if (lport->localport->node_name != node_name || + lport->localport->port_name != port_name) + continue; + + if (fcloop_lport_get(lport)) + return lport; + + break; + } + + return NULL; +} + +static struct fcloop_lport * +fcloop_lport_lookup(u64 node_name, u64 port_name) +{ + struct fcloop_lport *lport; + unsigned long flags; + + spin_lock_irqsave(&fcloop_lock, flags); + lport = __fcloop_lport_lookup(node_name, port_name); + spin_unlock_irqrestore(&fcloop_lock, flags); + + return lport; +} static ssize_t fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct fcloop_lport *tlport, *lport = NULL; + struct fcloop_lport *lport; u64 nodename, portname; - unsigned long flags; int ret; ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf); if (ret) return ret; - spin_lock_irqsave(&fcloop_lock, flags); - - list_for_each_entry(tlport, &fcloop_lports, lport_list) { - if (tlport->localport->node_name == nodename && - tlport->localport->port_name == portname) { - if (!fcloop_lport_get(tlport)) - break; - lport = tlport; - break; - } - } - spin_unlock_irqrestore(&fcloop_lock, flags); - + lport = fcloop_lport_lookup(nodename, portname); if (!lport) return -ENOENT; - ret = __wait_localport_unreg(lport); + ret = __localport_unreg(lport); fcloop_lport_put(lport); return ret ? ret : count; @@ -1223,8 +1333,8 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, static struct fcloop_nport * fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) { - struct fcloop_nport *newnport, *nport = NULL; - struct fcloop_lport *tmplport, *lport = NULL; + struct fcloop_nport *newnport, *nport; + struct fcloop_lport *lport; struct fcloop_ctrl_options *opts; unsigned long flags; u32 opts_mask = (remoteport) ? RPORT_OPTS : TGTPORT_OPTS; @@ -1239,10 +1349,8 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) goto out_free_opts; /* everything there ? */ - if ((opts->mask & opts_mask) != opts_mask) { - ret = -EINVAL; + if ((opts->mask & opts_mask) != opts_mask) goto out_free_opts; - } newnport = kzalloc(sizeof(*newnport), GFP_KERNEL); if (!newnport) @@ -1258,60 +1366,61 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) refcount_set(&newnport->ref, 1); spin_lock_irqsave(&fcloop_lock, flags); - - list_for_each_entry(tmplport, &fcloop_lports, lport_list) { - if (tmplport->localport->node_name == opts->wwnn && - tmplport->localport->port_name == opts->wwpn) - goto out_invalid_opts; - - if (tmplport->localport->node_name == opts->lpwwnn && - tmplport->localport->port_name == opts->lpwwpn) - lport = tmplport; + lport = __fcloop_lport_lookup(opts->wwnn, opts->wwpn); + if (lport) { + /* invalid configuration */ + fcloop_lport_put(lport); + goto out_free_newnport; } if (remoteport) { - if (!lport) - goto out_invalid_opts; - newnport->lport = lport; - } - - list_for_each_entry(nport, &fcloop_nports, nport_list) { - if (nport->node_name == opts->wwnn && - nport->port_name == opts->wwpn) { - if ((remoteport && nport->rport) || - (!remoteport && nport->tport)) { - nport = NULL; - goto out_invalid_opts; - } - - fcloop_nport_get(nport); - - spin_unlock_irqrestore(&fcloop_lock, flags); - - if (remoteport) - nport->lport = lport; - if (opts->mask & NVMF_OPT_ROLES) - nport->port_role = opts->roles; - if (opts->mask & NVMF_OPT_FCADDR) - nport->port_id = opts->fcaddr; + lport = __fcloop_lport_lookup(opts->lpwwnn, opts->lpwwpn); + if (!lport) { + /* invalid configuration */ goto out_free_newnport; } } - list_add_tail(&newnport->nport_list, &fcloop_nports); + nport = __fcloop_nport_lookup(opts->wwnn, opts->wwpn); + if (nport) { + if ((remoteport && nport->rport) || + (!remoteport && nport->tport)) { + /* invalid configuration */ + goto out_put_nport; + } + + /* found existing nport, discard the new nport */ + kfree(newnport); + } else { + list_add_tail(&newnport->nport_list, &fcloop_nports); + nport = newnport; + } + if (opts->mask & NVMF_OPT_ROLES) + nport->port_role = opts->roles; + if (opts->mask & NVMF_OPT_FCADDR) + nport->port_id = opts->fcaddr; + if (lport) { + if (!nport->lport) + nport->lport = lport; + else + fcloop_lport_put(lport); + } spin_unlock_irqrestore(&fcloop_lock, flags); kfree(opts); - return newnport; + return nport; -out_invalid_opts: - spin_unlock_irqrestore(&fcloop_lock, flags); +out_put_nport: + if (lport) + fcloop_lport_put(lport); + fcloop_nport_put(nport); out_free_newnport: + spin_unlock_irqrestore(&fcloop_lock, flags); kfree(newnport); out_free_opts: kfree(opts); - return nport; + return NULL; } static ssize_t @@ -1352,6 +1461,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->nport = nport; rport->lport = nport->lport; nport->rport = rport; + rport->flags = 0; spin_lock_init(&rport->lock); INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work); INIT_LIST_HEAD(&rport->ls_list); @@ -1365,21 +1475,18 @@ __unlink_remote_port(struct fcloop_nport *nport) { struct fcloop_rport *rport = nport->rport; + lockdep_assert_held(&fcloop_lock); + if (rport && nport->tport) nport->tport->remoteport = NULL; nport->rport = NULL; - list_del(&nport->nport_list); - return rport; } static int __remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) { - if (!rport) - return -EALREADY; - return nvme_fc_unregister_remoteport(rport->remoteport); } @@ -1387,8 +1494,8 @@ static ssize_t fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct fcloop_nport *nport = NULL, *tmpport; - static struct fcloop_rport *rport; + struct fcloop_nport *nport; + struct fcloop_rport *rport; u64 nodename, portname; unsigned long flags; int ret; @@ -1397,24 +1504,24 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr, if (ret) return ret; - spin_lock_irqsave(&fcloop_lock, flags); - - list_for_each_entry(tmpport, &fcloop_nports, nport_list) { - if (tmpport->node_name == nodename && - tmpport->port_name == portname && tmpport->rport) { - nport = tmpport; - rport = __unlink_remote_port(nport); - break; - } - } + nport = fcloop_nport_lookup(nodename, portname); + if (!nport) + return -ENOENT; + spin_lock_irqsave(&fcloop_lock, flags); + rport = __unlink_remote_port(nport); spin_unlock_irqrestore(&fcloop_lock, flags); - if (!nport) - return -ENOENT; + if (!rport) { + ret = -ENOENT; + goto out_nport_put; + } ret = __remoteport_unreg(nport, rport); +out_nport_put: + fcloop_nport_put(nport); + return ret ? ret : count; } @@ -1452,6 +1559,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->nport = nport; tport->lport = nport->lport; nport->tport = tport; + tport->flags = 0; spin_lock_init(&tport->lock); INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); INIT_LIST_HEAD(&tport->ls_list); @@ -1465,6 +1573,8 @@ __unlink_target_port(struct fcloop_nport *nport) { struct fcloop_tport *tport = nport->tport; + lockdep_assert_held(&fcloop_lock); + if (tport && nport->rport) nport->rport->targetport = NULL; nport->tport = NULL; @@ -1475,9 +1585,6 @@ __unlink_target_port(struct fcloop_nport *nport) static int __targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) { - if (!tport) - return -EALREADY; - return nvmet_fc_unregister_targetport(tport->targetport); } @@ -1485,8 +1592,8 @@ static ssize_t fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct fcloop_nport *nport = NULL, *tmpport; - struct fcloop_tport *tport = NULL; + struct fcloop_nport *nport; + struct fcloop_tport *tport; u64 nodename, portname; unsigned long flags; int ret; @@ -1495,24 +1602,24 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, if (ret) return ret; - spin_lock_irqsave(&fcloop_lock, flags); - - list_for_each_entry(tmpport, &fcloop_nports, nport_list) { - if (tmpport->node_name == nodename && - tmpport->port_name == portname && tmpport->tport) { - nport = tmpport; - tport = __unlink_target_port(nport); - break; - } - } + nport = fcloop_nport_lookup(nodename, portname); + if (!nport) + return -ENOENT; + spin_lock_irqsave(&fcloop_lock, flags); + tport = __unlink_target_port(nport); spin_unlock_irqrestore(&fcloop_lock, flags); - if (!nport) - return -ENOENT; + if (!tport) { + ret = -ENOENT; + goto out_nport_put; + } ret = __targetport_unreg(nport, tport); +out_nport_put: + fcloop_nport_put(nport); + return ret ? ret : count; } @@ -1578,15 +1685,20 @@ static const struct class fcloop_class = { }; static struct device *fcloop_device; - static int __init fcloop_init(void) { int ret; + lsreq_cache = kmem_cache_create("lsreq_cache", + sizeof(struct fcloop_lsreq), 0, + 0, NULL); + if (!lsreq_cache) + return -ENOMEM; + ret = class_register(&fcloop_class); if (ret) { pr_err("couldn't register class fcloop\n"); - return ret; + goto out_destroy_cache; } fcloop_device = device_create_with_groups( @@ -1604,13 +1716,15 @@ static int __init fcloop_init(void) out_destroy_class: class_unregister(&fcloop_class); +out_destroy_cache: + kmem_cache_destroy(lsreq_cache); return ret; } static void __exit fcloop_exit(void) { - struct fcloop_lport *lport = NULL; - struct fcloop_nport *nport = NULL; + struct fcloop_lport *lport; + struct fcloop_nport *nport; struct fcloop_tport *tport; struct fcloop_rport *rport; unsigned long flags; @@ -1621,7 +1735,7 @@ static void __exit fcloop_exit(void) for (;;) { nport = list_first_entry_or_null(&fcloop_nports, typeof(*nport), nport_list); - if (!nport) + if (!nport || !fcloop_nport_get(nport)) break; tport = __unlink_target_port(nport); @@ -1629,13 +1743,21 @@ static void __exit fcloop_exit(void) spin_unlock_irqrestore(&fcloop_lock, flags); - ret = __targetport_unreg(nport, tport); - if (ret) - pr_warn("%s: Failed deleting target port\n", __func__); + if (tport) { + ret = __targetport_unreg(nport, tport); + if (ret) + pr_warn("%s: Failed deleting target port\n", + __func__); + } - ret = __remoteport_unreg(nport, rport); - if (ret) - pr_warn("%s: Failed deleting remote port\n", __func__); + if (rport) { + ret = __remoteport_unreg(nport, rport); + if (ret) + pr_warn("%s: Failed deleting remote port\n", + __func__); + } + + fcloop_nport_put(nport); spin_lock_irqsave(&fcloop_lock, flags); } @@ -1648,7 +1770,7 @@ static void __exit fcloop_exit(void) spin_unlock_irqrestore(&fcloop_lock, flags); - ret = __wait_localport_unreg(lport); + ret = __localport_unreg(lport); if (ret) pr_warn("%s: Failed deleting local port\n", __func__); @@ -1663,6 +1785,7 @@ static void __exit fcloop_exit(void) device_destroy(&fcloop_class, MKDEV(0, 0)); class_unregister(&fcloop_class); + kmem_cache_destroy(lsreq_cache); } module_init(fcloop_init); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a5c41144667c..f85a8441bcc6 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -33,10 +33,12 @@ struct nvme_loop_ctrl { struct list_head list; struct blk_mq_tag_set tag_set; - struct nvme_loop_iod async_event_iod; struct nvme_ctrl ctrl; struct nvmet_port *port; + + /* Must be last --ends in a flexible-array member. */ + struct nvme_loop_iod async_event_iod; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -148,8 +150,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = queue->ctrl->port; - if (!nvmet_req_init(&iod->req, &queue->nvme_cq, - &queue->nvme_sq, &nvme_loop_ops)) + if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; if (blk_rq_nr_phys_segments(req)) { @@ -181,8 +182,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) iod->cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH; iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; - if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq, - &nvme_loop_ops)) { + if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) { dev_err(ctrl->ctrl.device, "failed async event work\n"); return; } @@ -273,6 +273,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); + nvmet_cq_put(&ctrl->queues[0].nvme_cq); nvme_remove_admin_tag_set(&ctrl->ctrl); } @@ -302,6 +303,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) for (i = 1; i < ctrl->ctrl.queue_count; i++) { clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvmet_cq_put(&ctrl->queues[i].nvme_cq); } ctrl->ctrl.queue_count = 1; /* @@ -327,9 +329,13 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) for (i = 1; i <= nr_io_queues; i++) { ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) + nvmet_cq_init(&ctrl->queues[i].nvme_cq); + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq, + &ctrl->queues[i].nvme_cq); + if (ret) { + nvmet_cq_put(&ctrl->queues[i].nvme_cq); goto out_destroy_queues; + } ctrl->ctrl.queue_count++; } @@ -360,9 +366,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) int error; ctrl->queues[0].ctrl = ctrl; - error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); - if (error) + nvmet_cq_init(&ctrl->queues[0].nvme_cq); + error = nvmet_sq_init(&ctrl->queues[0].nvme_sq, + &ctrl->queues[0].nvme_cq); + if (error) { + nvmet_cq_put(&ctrl->queues[0].nvme_cq); return error; + } ctrl->ctrl.queue_count = 1; error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, @@ -401,6 +411,7 @@ out_cleanup_tagset: nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_sq: nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); + nvmet_cq_put(&ctrl->queues[0].nvme_cq); return error; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index b6db8b74dc4a..df69a9dee71c 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -141,13 +141,16 @@ static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns) } struct nvmet_cq { + struct nvmet_ctrl *ctrl; u16 qid; u16 size; + refcount_t ref; }; struct nvmet_sq { struct nvmet_ctrl *ctrl; struct percpu_ref ref; + struct nvmet_cq *cq; u16 qid; u16 size; u32 sqhd; @@ -247,6 +250,7 @@ struct nvmet_pr_log_mgr { struct nvmet_ctrl { struct nvmet_subsys *subsys; struct nvmet_sq **sqs; + struct nvmet_cq **cqs; void *drvdata; @@ -424,7 +428,7 @@ struct nvmet_fabrics_ops { u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl); /* Operations mandatory for PCI target controllers */ - u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 flags, + u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 prp1); u16 (*delete_sq)(struct nvmet_ctrl *ctrl, u16 sqid); u16 (*create_cq)(struct nvmet_ctrl *ctrl, u16 cqid, u16 flags, @@ -557,8 +561,8 @@ u32 nvmet_fabrics_admin_cmd_data_len(struct nvmet_req *req); u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req); u32 nvmet_fabrics_io_cmd_data_len(struct nvmet_req *req); -bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); +bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, + const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); size_t nvmet_req_transfer_len(struct nvmet_req *req); bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); @@ -571,18 +575,24 @@ void nvmet_execute_set_features(struct nvmet_req *req); void nvmet_execute_get_features(struct nvmet_req *req); void nvmet_execute_keep_alive(struct nvmet_req *req); -u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid); +u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create); +u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create); +void nvmet_cq_init(struct nvmet_cq *cq); void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size); u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size); +void nvmet_cq_destroy(struct nvmet_cq *cq); +bool nvmet_cq_get(struct nvmet_cq *cq); +void nvmet_cq_put(struct nvmet_cq *cq); +bool nvmet_cq_in_use(struct nvmet_cq *cq); u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, bool create); void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, u16 size); -u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, - u16 size); +u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, + struct nvmet_cq *cq, u16 qid, u16 size); void nvmet_sq_destroy(struct nvmet_sq *sq); -int nvmet_sq_init(struct nvmet_sq *sq); +int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq); void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl); diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 7fab7f3d79b7..a4295a5b8d28 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); #define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1) enum nvmet_pci_epf_queue_flags { - NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */ - NVMET_PCI_EPF_Q_LIVE, /* The queue is live */ + NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */ NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */ }; @@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct nvmet_pci_epf_irq_vector *iv = cq->iv; bool ret; - if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) - return false; - /* IRQ coalescing for the admin queue is not allowed. */ if (!cq->qid) return true; @@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct pci_epf *epf = nvme_epf->epf; int ret = 0; - if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) || + !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) return; mutex_lock(&ctrl->irq_lock); @@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, switch (nvme_epf->irq_type) { case PCI_IRQ_MSIX: case PCI_IRQ_MSI: + /* + * If we fail to raise an MSI or MSI-X interrupt, it is likely + * because the host is using legacy INTX IRQs (e.g. BIOS, + * grub), but we can fallback to the INTX type only if the + * endpoint controller supports this type. + */ ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, nvme_epf->irq_type, cq->vector + 1); - if (!ret) + if (!ret || !nvme_epf->epc_features->intx_capable) break; - /* - * If we got an error, it is likely because the host is using - * legacy IRQs (e.g. BIOS, grub). - */ fallthrough; case PCI_IRQ_INTX: ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, @@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, } if (ret) - dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret); + dev_err_ratelimited(ctrl->dev, + "CQ[%u]: Failed to raise IRQ (err=%d)\n", + cq->qid, ret); unlock: mutex_unlock(&ctrl->irq_lock); @@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); - dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", - cqid, qsize, cq->qes, cq->vector); + if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", + cqid, qsize, cq->qes, cq->vector); + else + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ disabled\n", + cqid, qsize, cq->qes); return NVME_SC_SUCCESS; @@ -1344,17 +1351,20 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); - nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); + nvmet_cq_put(&cq->nvme_cq); return NVME_SC_SUCCESS; } static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, - u16 sqid, u16 flags, u16 qsize, u64 pci_addr) + u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 pci_addr) { struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata; struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid]; + struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; u16 status; if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) @@ -1377,7 +1387,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, sq->qes = ctrl->io_sqes; sq->pci_size = sq->qes * sq->depth; - status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth); + status = nvmet_sq_create(tctrl, &sq->nvme_sq, &cq->nvme_cq, sqid, + sq->depth); if (status != NVME_SC_SUCCESS) return status; @@ -1533,7 +1544,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl, if (sq) { queue = &ctrl->sq[qid]; - set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags); } else { queue = &ctrl->cq[qid]; INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work); @@ -1594,8 +1604,7 @@ static void nvmet_pci_epf_exec_iod_work(struct work_struct *work) goto complete; } - if (!nvmet_req_init(req, &iod->cq->nvme_cq, &iod->sq->nvme_sq, - &nvmet_pci_epf_fabrics_ops)) + if (!nvmet_req_init(req, &iod->sq->nvme_sq, &nvmet_pci_epf_fabrics_ops)) goto complete; iod->data_len = nvmet_req_transfer_len(req); @@ -1872,8 +1881,8 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) qsize = aqa & 0x00000fff; pci_addr = asq & GENMASK_ULL(63, 12); - status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, NVME_QUEUE_PHYS_CONTIG, - qsize, pci_addr); + status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, 0, + NVME_QUEUE_PHYS_CONTIG, qsize, pci_addr); if (status != NVME_SC_SUCCESS) { dev_err(ctrl->dev, "Failed to create admin submission queue\n"); nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2a4536ef6184..432bdf7cd49e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -976,8 +976,7 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, cmd->send_sge.addr, cmd->send_sge.length, DMA_TO_DEVICE); - if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, - &queue->nvme_sq, &nvmet_rdma_ops)) + if (!nvmet_req_init(&cmd->req, &queue->nvme_sq, &nvmet_rdma_ops)) return; status = nvmet_rdma_map_sgl(cmd); @@ -1353,6 +1352,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) pr_debug("freeing queue %d\n", queue->idx); nvmet_sq_destroy(&queue->nvme_sq); + nvmet_cq_put(&queue->nvme_cq); nvmet_rdma_destroy_queue_ib(queue); if (!queue->nsrq) { @@ -1436,7 +1436,8 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, goto out_reject; } - ret = nvmet_sq_init(&queue->nvme_sq); + nvmet_cq_init(&queue->nvme_cq); + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_free_queue; @@ -1517,6 +1518,7 @@ out_ida_remove: out_destroy_sq: nvmet_sq_destroy(&queue->nvme_sq); out_free_queue: + nvmet_cq_put(&queue->nvme_cq); kfree(queue); out_reject: nvmet_rdma_cm_reject(cm_id, ret); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 12a5cb8641ca..c6603bd9c95e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/crc32c.h> #include <linux/err.h> #include <linux/nvme-tcp.h> #include <linux/nvme-keyring.h> @@ -17,7 +18,6 @@ #include <net/handshake.h> #include <linux/inet.h> #include <linux/llist.h> -#include <crypto/hash.h> #include <trace/events/sock.h> #include "nvmet.h" @@ -172,8 +172,6 @@ struct nvmet_tcp_queue { /* digest state */ bool hdr_digest; bool data_digest; - struct ahash_request *snd_hash; - struct ahash_request *rcv_hash; /* TLS state */ key_serial_t tls_pskid; @@ -294,14 +292,9 @@ static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; } -static inline void nvmet_tcp_hdgst(struct ahash_request *hash, - void *pdu, size_t len) +static inline void nvmet_tcp_hdgst(void *pdu, size_t len) { - struct scatterlist sg; - - sg_init_one(&sg, pdu, len); - ahash_request_set_crypt(hash, &sg, pdu + len, len); - crypto_ahash_digest(hash); + put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len); } static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, @@ -318,7 +311,7 @@ static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, } recv_digest = *(__le32 *)(pdu + hdr->hlen); - nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); + nvmet_tcp_hdgst(pdu, len); exp_digest = *(__le32 *)(pdu + hdr->hlen); if (recv_digest != exp_digest) { pr_err("queue %d: header digest error: recv %#x expected %#x\n", @@ -441,12 +434,24 @@ err: return NVME_SC_INTERNAL; } -static void nvmet_tcp_calc_ddgst(struct ahash_request *hash, - struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_calc_ddgst(struct nvmet_tcp_cmd *cmd) { - ahash_request_set_crypt(hash, cmd->req.sg, - (void *)&cmd->exp_ddgst, cmd->req.transfer_len); - crypto_ahash_digest(hash); + size_t total_len = cmd->req.transfer_len; + struct scatterlist *sg = cmd->req.sg; + u32 crc = ~0; + + while (total_len) { + size_t len = min_t(size_t, total_len, sg->length); + + /* + * Note that the scatterlist does not contain any highmem pages, + * as it was allocated by sgl_alloc() with GFP_KERNEL. + */ + crc = crc32c(crc, sg_virt(sg), len); + total_len -= len; + sg = sg_next(sg); + } + cmd->exp_ddgst = cpu_to_le32(~crc); } static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) @@ -473,19 +478,18 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) if (queue->data_digest) { pdu->hdr.flags |= NVME_TCP_F_DDGST; - nvmet_tcp_calc_ddgst(queue->snd_hash, cmd); + nvmet_tcp_calc_ddgst(cmd); } if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; - struct nvmet_tcp_queue *queue = cmd->queue; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; @@ -503,14 +507,13 @@ static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; - struct nvmet_tcp_queue *queue = cmd->queue; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; @@ -523,7 +526,7 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } @@ -857,42 +860,6 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } -static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); - - ahash_request_free(queue->rcv_hash); - ahash_request_free(queue->snd_hash); - crypto_free_ahash(tfm); -} - -static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) -{ - struct crypto_ahash *tfm; - - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->snd_hash) - goto free_tfm; - ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); - - queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->rcv_hash) - goto free_snd_hash; - ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); - - return 0; -free_snd_hash: - ahash_request_free(queue->snd_hash); -free_tfm: - crypto_free_ahash(tfm); - return -ENOMEM; -} - - static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) { struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; @@ -921,11 +888,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); - if (queue->hdr_digest || queue->data_digest) { - ret = nvmet_tcp_alloc_crypto(queue); - if (ret) - return ret; - } memset(icresp, 0, sizeof(*icresp)); icresp->hdr.type = nvme_tcp_icresp; @@ -1077,8 +1039,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) req = &queue->cmd->req; memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); - if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, - &queue->nvme_sq, &nvmet_tcp_ops))) { + if (unlikely(!nvmet_req_init(req, &queue->nvme_sq, &nvmet_tcp_ops))) { pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n", req->cmd, req->cmd->common.command_id, req->cmd->common.opcode, @@ -1247,7 +1208,7 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) { struct nvmet_tcp_queue *queue = cmd->queue; - nvmet_tcp_calc_ddgst(queue->rcv_hash, cmd); + nvmet_tcp_calc_ddgst(cmd); queue->offset = 0; queue->left = NVME_TCP_DIGEST_LENGTH; queue->rcv_state = NVMET_TCP_RECV_DDGST; @@ -1615,13 +1576,12 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_sq_put_tls_key(&queue->nvme_sq); nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); + nvmet_cq_put(&queue->nvme_cq); cancel_work_sync(&queue->io_work); nvmet_tcp_free_cmd_data_in_buffers(queue); /* ->sock will be released by fput() */ fput(queue->sock->file); nvmet_tcp_free_cmds(queue); - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); ida_free(&nvmet_tcp_queue_ida, queue->idx); page_frag_cache_drain(&queue->pf_cache); kfree(queue); @@ -1950,7 +1910,8 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, if (ret) goto out_ida_remove; - ret = nvmet_sq_init(&queue->nvme_sq); + nvmet_cq_init(&queue->nvme_cq); + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); if (ret) goto out_free_connect; @@ -1993,6 +1954,7 @@ out_destroy_sq: mutex_unlock(&nvmet_tcp_queue_mutex); nvmet_sq_destroy(&queue->nvme_sq); out_free_connect: + nvmet_cq_put(&queue->nvme_cq); nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: ida_free(&nvmet_tcp_queue_ida, queue->idx); diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index 2bec70615449..f59caff4b3d4 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -93,6 +93,16 @@ static const struct of_device_id can_transceiver_phy_ids[] = { }; MODULE_DEVICE_TABLE(of, can_transceiver_phy_ids); +/* Temporary wrapper until the multiplexer subsystem supports optional muxes */ +static inline struct mux_state * +devm_mux_state_get_optional(struct device *dev, const char *mux_name) +{ + if (!of_property_present(dev->of_node, "mux-states")) + return NULL; + + return devm_mux_state_get(dev, mux_name); +} + static int can_transceiver_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -114,13 +124,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node); drvdata = match->data; - mux_state = devm_mux_state_get(dev, NULL); - if (IS_ERR(mux_state)) { - if (PTR_ERR(mux_state) == -EPROBE_DEFER) - return PTR_ERR(mux_state); - } else { - can_transceiver_phy->mux_state = mux_state; - } + mux_state = devm_mux_state_get_optional(dev, NULL); + if (IS_ERR(mux_state)) + return PTR_ERR(mux_state); + + can_transceiver_phy->mux_state = mux_state; phy = devm_phy_create(dev, dev->of_node, &can_transceiver_phy_ops); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 45b3b792696e..b33e2e2b5014 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1754,7 +1754,8 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]); } - qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); + if (qmp->mode == PHY_MODE_UFS_HS_B) + qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } static int qmp_ufs_com_init(struct qmp_ufs *qmp) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 775f4f973a6c..9fdf17e0848a 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -9,6 +9,7 @@ * Copyright (C) 2014 Cogent Embedded, Inc. */ +#include <linux/cleanup.h> #include <linux/extcon-provider.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -107,7 +108,6 @@ struct rcar_gen3_phy { struct rcar_gen3_chan *ch; u32 int_enable_bits; bool initialized; - bool otg_initialized; bool powered; }; @@ -119,9 +119,8 @@ struct rcar_gen3_chan { struct regulator *vbus; struct reset_control *rstc; struct work_struct work; - struct mutex lock; /* protects rphys[...].powered */ + spinlock_t lock; /* protects access to hardware and driver data structure. */ enum usb_dr_mode dr_mode; - int irq; u32 obint_enable_bits; bool extcon_host; bool is_otg_channel; @@ -320,16 +319,15 @@ static bool rcar_gen3_is_any_rphy_initialized(struct rcar_gen3_chan *ch) return false; } -static bool rcar_gen3_needs_init_otg(struct rcar_gen3_chan *ch) +static bool rcar_gen3_is_any_otg_rphy_initialized(struct rcar_gen3_chan *ch) { - int i; - - for (i = 0; i < NUM_OF_PHYS; i++) { - if (ch->rphys[i].otg_initialized) - return false; + for (enum rcar_gen3_phy_index i = PHY_INDEX_BOTH_HC; i <= PHY_INDEX_EHCI; + i++) { + if (ch->rphys[i].initialized) + return true; } - return true; + return false; } static bool rcar_gen3_are_all_rphys_power_off(struct rcar_gen3_chan *ch) @@ -351,7 +349,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, bool is_b_device; enum phy_mode cur_mode, new_mode; - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + guard(spinlock_irqsave)(&ch->lock); + + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; if (sysfs_streq(buf, "host")) @@ -389,7 +389,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr, { struct rcar_gen3_chan *ch = dev_get_drvdata(dev); - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" : @@ -402,6 +402,9 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) void __iomem *usb2_base = ch->base; u32 val; + if (!ch->is_otg_channel || rcar_gen3_is_any_otg_rphy_initialized(ch)) + return; + /* Should not use functions of read-modify-write a register */ val = readl(usb2_base + USB2_LINECTRL1); val = (val & ~USB2_LINECTRL1_DP_RPD) | USB2_LINECTRL1_DPRPD_EN | @@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_ADPCTRL); writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL); } - msleep(20); + mdelay(20); writel(0xffffffff, usb2_base + USB2_OBINTSTA); writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN); @@ -427,16 +430,27 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch) { struct rcar_gen3_chan *ch = _ch; void __iomem *usb2_base = ch->base; - u32 status = readl(usb2_base + USB2_OBINTSTA); + struct device *dev = ch->dev; irqreturn_t ret = IRQ_NONE; + u32 status; + + pm_runtime_get_noresume(dev); + + if (pm_runtime_suspended(dev)) + goto rpm_put; - if (status & ch->obint_enable_bits) { - dev_vdbg(ch->dev, "%s: %08x\n", __func__, status); - writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); - rcar_gen3_device_recognition(ch); - ret = IRQ_HANDLED; + scoped_guard(spinlock, &ch->lock) { + status = readl(usb2_base + USB2_OBINTSTA); + if (status & ch->obint_enable_bits) { + dev_vdbg(dev, "%s: %08x\n", __func__, status); + writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); + rcar_gen3_device_recognition(ch); + ret = IRQ_HANDLED; + } } +rpm_put: + pm_runtime_put_noidle(dev); return ret; } @@ -446,32 +460,23 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; void __iomem *usb2_base = channel->base; u32 val; - int ret; - if (!rcar_gen3_is_any_rphy_initialized(channel) && channel->irq >= 0) { - INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); - ret = request_irq(channel->irq, rcar_gen3_phy_usb2_irq, - IRQF_SHARED, dev_name(channel->dev), channel); - if (ret < 0) { - dev_err(channel->dev, "No irq handler (%d)\n", channel->irq); - return ret; - } - } + guard(spinlock_irqsave)(&channel->lock); /* Initialize USB2 part */ val = readl(usb2_base + USB2_INT_ENABLE); val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits; writel(val, usb2_base + USB2_INT_ENABLE); - writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); - writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); - - /* Initialize otg part */ - if (channel->is_otg_channel) { - if (rcar_gen3_needs_init_otg(channel)) - rcar_gen3_init_otg(channel); - rphy->otg_initialized = true; + + if (!rcar_gen3_is_any_rphy_initialized(channel)) { + writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); + writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); } + /* Initialize otg part (only if we initialize a PHY with IRQs). */ + if (rphy->int_enable_bits) + rcar_gen3_init_otg(channel); + rphy->initialized = true; return 0; @@ -484,10 +489,9 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) void __iomem *usb2_base = channel->base; u32 val; - rphy->initialized = false; + guard(spinlock_irqsave)(&channel->lock); - if (channel->is_otg_channel) - rphy->otg_initialized = false; + rphy->initialized = false; val = readl(usb2_base + USB2_INT_ENABLE); val &= ~rphy->int_enable_bits; @@ -495,9 +499,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) val &= ~USB2_INT_ENABLE_UCOM_INTEN; writel(val, usb2_base + USB2_INT_ENABLE); - if (channel->irq >= 0 && !rcar_gen3_is_any_rphy_initialized(channel)) - free_irq(channel->irq, channel); - return 0; } @@ -509,16 +510,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) u32 val; int ret = 0; - mutex_lock(&channel->lock); - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; - if (channel->vbus) { ret = regulator_enable(channel->vbus); if (ret) - goto out; + return ret; } + guard(spinlock_irqsave)(&channel->lock); + + if (!rcar_gen3_are_all_rphys_power_off(channel)) + goto out; + val = readl(usb2_base + USB2_USBCTR); val |= USB2_USBCTR_PLL_RST; writel(val, usb2_base + USB2_USBCTR); @@ -528,7 +530,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) out: /* The powered flag should be set for any other phys anyway */ rphy->powered = true; - mutex_unlock(&channel->lock); return 0; } @@ -539,18 +540,20 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; int ret = 0; - mutex_lock(&channel->lock); - rphy->powered = false; + scoped_guard(spinlock_irqsave, &channel->lock) { + rphy->powered = false; - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; + if (rcar_gen3_are_all_rphys_power_off(channel)) { + u32 val = readl(channel->base + USB2_USBCTR); + + val |= USB2_USBCTR_PLL_RST; + writel(val, channel->base + USB2_USBCTR); + } + } if (channel->vbus) ret = regulator_disable(channel->vbus); -out: - mutex_unlock(&channel->lock); - return ret; } @@ -703,7 +706,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rcar_gen3_chan *channel; struct phy_provider *provider; - int ret = 0, i; + int ret = 0, i, irq; if (!dev->of_node) { dev_err(dev, "This driver needs device tree\n"); @@ -719,8 +722,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) return PTR_ERR(channel->base); channel->obint_enable_bits = USB2_OBINT_BITS; - /* get irq number here and request_irq for OTG in phy_init */ - channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { channel->is_otg_channel = true; @@ -763,7 +764,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) if (phy_data->no_adp_ctrl) channel->obint_enable_bits = USB2_OBINT_IDCHG_EN; - mutex_init(&channel->lock); + spin_lock_init(&channel->lock); for (i = 0; i < NUM_OF_PHYS; i++) { channel->rphys[i].phy = devm_phy_create(dev, NULL, phy_data->phy_usb2_ops); @@ -789,6 +790,20 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->vbus = NULL; } + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) { + ret = irq; + goto error; + } else if (irq > 0) { + INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); + ret = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq, + IRQF_SHARED, dev_name(dev), channel); + if (ret < 0) { + dev_err(dev, "Failed to request irq (%d)\n", irq); + goto error; + } + } + provider = devm_of_phy_provider_register(dev, rcar_gen3_phy_usb2_xlate); if (IS_ERR(provider)) { dev_err(dev, "Failed to register PHY provider\n"); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c index 08c78c1bafc9..28a052e17366 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c @@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev) return ret; } - clk_prepare_enable(samsung->ref_clk); + ret = clk_prepare_enable(samsung->ref_clk); if (ret) { dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret); clk_disable_unprepare(samsung->pclk); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index fe7c05748356..77236f012a1f 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -476,6 +476,8 @@ static const struct ropll_config ropll_tmds_cfg[] = { 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, + { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5, + 4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, diff --git a/drivers/phy/starfive/phy-jh7110-usb.c b/drivers/phy/starfive/phy-jh7110-usb.c index cb5454fbe2c8..b505d89860b4 100644 --- a/drivers/phy/starfive/phy-jh7110-usb.c +++ b/drivers/phy/starfive/phy-jh7110-usb.c @@ -18,6 +18,8 @@ #include <linux/usb/of.h> #define USB_125M_CLK_RATE 125000000 +#define USB_CLK_MODE_OFF 0x0 +#define USB_CLK_MODE_RX_NORMAL_PWR BIT(1) #define USB_LS_KEEPALIVE_OFF 0x4 #define USB_LS_KEEPALIVE_ENABLE BIT(4) @@ -78,6 +80,7 @@ static int jh7110_usb2_phy_init(struct phy *_phy) { struct jh7110_usb2_phy *phy = phy_get_drvdata(_phy); int ret; + unsigned int val; ret = clk_set_rate(phy->usb_125m_clk, USB_125M_CLK_RATE); if (ret) @@ -87,6 +90,10 @@ static int jh7110_usb2_phy_init(struct phy *_phy) if (ret) return ret; + val = readl(phy->regs + USB_CLK_MODE_OFF); + val |= USB_CLK_MODE_RX_NORMAL_PWR; + writel(val, phy->regs + USB_CLK_MODE_OFF); + return 0; } diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index fae6242aa730..23a23f2d64e5 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -237,6 +237,8 @@ #define DATA0_VAL_PD BIT(1) #define USE_XUSB_AO BIT(4) +#define TEGRA_UTMI_PAD_MAX 4 + #define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \ { \ .name = _name, \ @@ -269,7 +271,7 @@ struct tegra186_xusb_padctl { /* UTMI bias and tracking */ struct clk *usb2_trk_clk; - unsigned int bias_pad_enable; + DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX); /* padctl context */ struct tegra186_xusb_padctl_context context; @@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) u32 value; int err; - mutex_lock(&padctl->lock); - - if (priv->bias_pad_enable++ > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } err = clk_prepare_enable(priv->usb2_trk_clk); if (err < 0) @@ -658,8 +656,6 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) } else { clk_disable_unprepare(priv->usb2_trk_clk); } - - mutex_unlock(&padctl->lock); } static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) @@ -667,17 +663,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); u32 value; - mutex_lock(&padctl->lock); - - if (WARN_ON(priv->bias_pad_enable == 0)) { - mutex_unlock(&padctl->lock); - return; - } - - if (--priv->bias_pad_enable > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1); value |= USB2_PD_TRK; @@ -690,13 +677,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) clk_disable_unprepare(priv->usb2_trk_clk); } - mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_on(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); struct tegra_xusb_usb2_port *port; struct device *dev = padctl->dev; unsigned int index = lane->index; @@ -705,9 +692,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) if (!phy) return; + mutex_lock(&padctl->lock); + if (test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { dev_err(dev, "no port found for USB2 lane %u\n", index); + mutex_unlock(&padctl->lock); return; } @@ -724,18 +718,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~USB2_OTG_PD_DR; padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); + + set_bit(index, priv->utmi_pad_enabled); + mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_down(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); unsigned int index = lane->index; u32 value; if (!phy) return; + mutex_lock(&padctl->lock); + if (!test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + dev_dbg(padctl->dev, "power down UTMI pad %u\n", index); value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index)); @@ -748,7 +752,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy) udelay(2); + clear_bit(index, priv->utmi_pad_enabled); + tegra186_utmi_bias_pad_power_off(padctl); + + mutex_unlock(&padctl->lock); } static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 79d4814d758d..c89df95aa6ca 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port, err = dev_set_name(&port->dev, "%s-%u", name, index); if (err < 0) - goto unregister; + goto put_device; err = device_add(&port->dev); if (err < 0) - goto unregister; + goto put_device; return 0; -unregister: - device_unregister(&port->dev); +put_device: + put_device(&port->dev); return err; } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 82f0cc43bbf4..0eb816395dc6 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -44,7 +44,6 @@ * @pctrl: pinctrl handle. * @chip: gpiochip handle. * @desc: pin controller descriptor - * @restart_nb: restart notifier block. * @irq: parent irq for the TLMM irq_chip. * @intr_target_use_scm: route irq to application cpu using scm calls * @lock: Spinlock to protect register resources as well @@ -64,7 +63,6 @@ struct msm_pinctrl { struct pinctrl_dev *pctrl; struct gpio_chip chip; struct pinctrl_desc desc; - struct notifier_block restart_nb; int irq; @@ -1471,10 +1469,9 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return 0; } -static int msm_ps_hold_restart(struct notifier_block *nb, unsigned long action, - void *data) +static int msm_ps_hold_restart(struct sys_off_data *data) { - struct msm_pinctrl *pctrl = container_of(nb, struct msm_pinctrl, restart_nb); + struct msm_pinctrl *pctrl = data->cb_data; writel(0, pctrl->regs[0] + PS_HOLD_OFFSET); mdelay(1000); @@ -1485,7 +1482,11 @@ static struct msm_pinctrl *poweroff_pctrl; static void msm_ps_hold_poweroff(void) { - msm_ps_hold_restart(&poweroff_pctrl->restart_nb, 0, NULL); + struct sys_off_data data = { + .cb_data = poweroff_pctrl, + }; + + msm_ps_hold_restart(&data); } static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) @@ -1495,9 +1496,11 @@ static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) for (i = 0; i < pctrl->soc->nfunctions; i++) if (!strcmp(func[i].name, "ps_hold")) { - pctrl->restart_nb.notifier_call = msm_ps_hold_restart; - pctrl->restart_nb.priority = 128; - if (register_restart_handler(&pctrl->restart_nb)) + if (devm_register_sys_off_handler(pctrl->dev, + SYS_OFF_MODE_RESTART, + 128, + msm_ps_hold_restart, + pctrl)) dev_err(pctrl->dev, "failed to setup restart handler.\n"); poweroff_pctrl = pctrl; @@ -1599,8 +1602,6 @@ void msm_pinctrl_remove(struct platform_device *pdev) struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); gpiochip_remove(&pctrl->chip); - - unregister_restart_handler(&pctrl->restart_nb); } EXPORT_SYMBOL(msm_pinctrl_remove); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index 230e6ee96636..d8f1bf5e58a0 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -45,7 +45,7 @@ static ssize_t current_password_store(struct kobject *kobj, int length; length = strlen(buf); - if (buf[length-1] == '\n') + if (length && buf[length - 1] == '\n') length--; /* firmware does verifiation of min/max password length, diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index a0eae24ca9e6..162809140f68 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -17,13 +17,13 @@ /* * fujitsu-laptop.c - Fujitsu laptop support, providing access to additional * features made available on a range of Fujitsu laptops including the - * P2xxx/P5xxx/S6xxx/S7xxx series. + * P2xxx/P5xxx/S2xxx/S6xxx/S7xxx series. * * This driver implements a vendor-specific backlight control interface for * Fujitsu laptops and provides support for hotkeys present on certain Fujitsu * laptops. * - * This driver has been tested on a Fujitsu Lifebook S6410, S7020 and + * This driver has been tested on a Fujitsu Lifebook S2110, S6410, S7020 and * P8010. It should work on most P-series and S-series Lifebooks, but * YMMV. * @@ -107,7 +107,11 @@ #define KEY2_CODE 0x411 #define KEY3_CODE 0x412 #define KEY4_CODE 0x413 -#define KEY5_CODE 0x420 +#define KEY5_CODE 0x414 +#define KEY6_CODE 0x415 +#define KEY7_CODE 0x416 +#define KEY8_CODE 0x417 +#define KEY9_CODE 0x420 /* Hotkey ringbuffer limits */ #define MAX_HOTKEY_RINGBUFFER_SIZE 100 @@ -560,7 +564,7 @@ static const struct key_entry keymap_default[] = { { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, { KE_KEY, KEY3_CODE, { KEY_PROG3 } }, { KE_KEY, KEY4_CODE, { KEY_PROG4 } }, - { KE_KEY, KEY5_CODE, { KEY_RFKILL } }, + { KE_KEY, KEY9_CODE, { KEY_RFKILL } }, /* Soft keys read from status flags */ { KE_KEY, FLAG_RFKILL, { KEY_RFKILL } }, { KE_KEY, FLAG_TOUCHPAD_TOGGLE, { KEY_TOUCHPAD_TOGGLE } }, @@ -584,6 +588,18 @@ static const struct key_entry keymap_p8010[] = { { KE_END, 0 } }; +static const struct key_entry keymap_s2110[] = { + { KE_KEY, KEY1_CODE, { KEY_PROG1 } }, /* "A" */ + { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, /* "B" */ + { KE_KEY, KEY3_CODE, { KEY_WWW } }, /* "Internet" */ + { KE_KEY, KEY4_CODE, { KEY_EMAIL } }, /* "E-mail" */ + { KE_KEY, KEY5_CODE, { KEY_STOPCD } }, + { KE_KEY, KEY6_CODE, { KEY_PLAYPAUSE } }, + { KE_KEY, KEY7_CODE, { KEY_PREVIOUSSONG } }, + { KE_KEY, KEY8_CODE, { KEY_NEXTSONG } }, + { KE_END, 0 } +}; + static const struct key_entry *keymap = keymap_default; static int fujitsu_laptop_dmi_keymap_override(const struct dmi_system_id *id) @@ -621,6 +637,15 @@ static const struct dmi_system_id fujitsu_laptop_dmi_table[] = { }, .driver_data = (void *)keymap_p8010 }, + { + .callback = fujitsu_laptop_dmi_keymap_override, + .ident = "Fujitsu LifeBook S2110", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S2110"), + }, + .driver_data = (void *)keymap_s2110 + }, {} }; diff --git a/drivers/platform/x86/intel/pmc/arl.c b/drivers/platform/x86/intel/pmc/arl.c index 320993bd6d31..f9c48738b853 100644 --- a/drivers/platform/x86/intel/pmc/arl.c +++ b/drivers/platform/x86/intel/pmc/arl.c @@ -681,6 +681,7 @@ static struct pmc_info arl_pmc_info_list[] = { #define ARL_NPU_PCI_DEV 0xad1d #define ARL_GNA_PCI_DEV 0xae4c +#define ARL_H_NPU_PCI_DEV 0x7d1d #define ARL_H_GNA_PCI_DEV 0x774c /* * Set power state of select devices that do not have drivers to D3 @@ -694,7 +695,7 @@ static void arl_d3_fixup(void) static void arl_h_d3_fixup(void) { - pmc_core_set_device_d3(ARL_NPU_PCI_DEV); + pmc_core_set_device_d3(ARL_H_NPU_PCI_DEV); pmc_core_set_device_d3(ARL_H_GNA_PCI_DEV); } diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 0fc275e461be..00b1e7c79a3d 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1061,8 +1061,8 @@ static ssize_t current_value_store(struct kobject *kobj, ret = -EINVAL; goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, tlmi_priv.pwd_admin->signature); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, tlmi_priv.pwd_admin->signature); if (!set_str) { ret = -ENOMEM; goto out; @@ -1092,7 +1092,7 @@ static ssize_t current_value_store(struct kobject *kobj, goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, new_setting); if (!set_str) { ret = -ENOMEM; @@ -1120,11 +1120,11 @@ static ssize_t current_value_store(struct kobject *kobj, } if (auth_str) - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, auth_str); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, auth_str); else - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, - new_setting); + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, + new_setting); if (!set_str) { ret = -ENOMEM; goto out; @@ -1629,9 +1629,6 @@ static int tlmi_analyze(struct wmi_device *wdev) continue; } - /* It is not allowed to have '/' for file name. Convert it into '\'. */ - strreplace(item, '/', '\\'); - /* Remove the value part */ strreplace(item, ',', '\0'); @@ -1644,11 +1641,16 @@ static int tlmi_analyze(struct wmi_device *wdev) } setting->wdev = wdev; setting->index = i; + + strscpy(setting->name, item); + /* It is not allowed to have '/' for file name. Convert it into '\'. */ + strreplace(item, '/', '\\'); strscpy(setting->display_name, item); + /* If BIOS selections supported, load those */ if (tlmi_priv.can_get_bios_selections) { - ret = tlmi_get_bios_selections(setting->display_name, - &setting->possible_values); + ret = tlmi_get_bios_selections(setting->name, + &setting->possible_values); if (ret || !setting->possible_values) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index a80452482227..9b014644d316 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -90,6 +90,7 @@ struct tlmi_attr_setting { struct kobject kobj; struct wmi_device *wdev; int index; + char name[TLMI_SETTINGS_MAXLEN]; char display_name[TLMI_SETTINGS_MAXLEN]; char *possible_values; }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 92b21e49faf6..657625dd60a0 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -231,6 +231,7 @@ enum tpacpi_hkey_event_t { /* Thermal events */ TP_HKEY_EV_ALARM_BAT_HOT = 0x6011, /* battery too hot */ TP_HKEY_EV_ALARM_BAT_XHOT = 0x6012, /* battery critically hot */ + TP_HKEY_EV_ALARM_BAT_LIM_CHANGE = 0x6013, /* battery charge limit changed*/ TP_HKEY_EV_ALARM_SENSOR_HOT = 0x6021, /* sensor too hot */ TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */ TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* windows; thermal table changed */ @@ -3777,6 +3778,10 @@ static bool hotkey_notify_6xxx(const u32 hkey, bool *send_acpi_ev) pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; + case TP_HKEY_EV_ALARM_BAT_LIM_CHANGE: + pr_debug("Battery Info: battery charge threshold changed\n"); + /* User changed charging threshold. No action needed */ + return true; case TP_HKEY_EV_ALARM_SENSOR_HOT: pr_crit("THERMAL ALARM: a sensor reports something is too hot!\n"); /* recommended action: warn user through gui, that */ diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 9b2f28b34bb5..d6c1ddb807b2 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, /* Verify that the index is within a valid range. */ num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells"); - if (index >= num_domains) + if (num_domains < 0 || index >= num_domains) return NULL; /* Allocate and register device on the genpd bus. */ diff --git a/drivers/pmdomain/renesas/rcar-gen4-sysc.c b/drivers/pmdomain/renesas/rcar-gen4-sysc.c index 66409cff2083..e001b5c25bed 100644 --- a/drivers/pmdomain/renesas/rcar-gen4-sysc.c +++ b/drivers/pmdomain/renesas/rcar-gen4-sysc.c @@ -338,11 +338,6 @@ static int __init rcar_gen4_sysc_pd_init(void) struct rcar_gen4_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { diff --git a/drivers/pmdomain/renesas/rcar-sysc.c b/drivers/pmdomain/renesas/rcar-sysc.c index dce1a6d37e80..047495f54e8a 100644 --- a/drivers/pmdomain/renesas/rcar-sysc.c +++ b/drivers/pmdomain/renesas/rcar-sysc.c @@ -396,11 +396,6 @@ static int __init rcar_sysc_pd_init(void) struct rcar_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 2ccdca4f6960..e63481f24238 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -315,6 +315,8 @@ struct ptp_ocp_serial_port { #define OCP_BOARD_ID_LEN 13 #define OCP_SERIAL_LEN 6 #define OCP_SMA_NUM 4 +#define OCP_SIGNAL_NUM 4 +#define OCP_FREQ_NUM 4 enum { PORT_GNSS, @@ -342,8 +344,8 @@ struct ptp_ocp { struct dcf_master_reg __iomem *dcf_out; struct dcf_slave_reg __iomem *dcf_in; struct tod_reg __iomem *nmea_out; - struct frequency_reg __iomem *freq_in[4]; - struct ptp_ocp_ext_src *signal_out[4]; + struct frequency_reg __iomem *freq_in[OCP_FREQ_NUM]; + struct ptp_ocp_ext_src *signal_out[OCP_SIGNAL_NUM]; struct ptp_ocp_ext_src *pps; struct ptp_ocp_ext_src *ts0; struct ptp_ocp_ext_src *ts1; @@ -378,10 +380,12 @@ struct ptp_ocp { u32 utc_tai_offset; u32 ts_window_adjust; u64 fw_cap; - struct ptp_ocp_signal signal[4]; + struct ptp_ocp_signal signal[OCP_SIGNAL_NUM]; struct ptp_ocp_sma_connector sma[OCP_SMA_NUM]; const struct ocp_sma_op *sma_op; struct dpll_device *dpll; + int signals_nr; + int freq_in_nr; }; #define OCP_REQ_TIMESTAMP BIT(0) @@ -2697,6 +2701,8 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->eeprom_map = fb_eeprom_map; bp->fw_version = ioread32(&bp->image->version); bp->sma_op = &ocp_fb_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; ptp_ocp_fb_set_version(bp); @@ -2862,6 +2868,8 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->fw_version = ioread32(&bp->reg->version); bp->fw_tag = 2; bp->sma_op = &ocp_art_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; /* Enable MAC serial port during initialisation */ iowrite32(1, &bp->board_config->mro50_serial_activate); @@ -2888,6 +2896,8 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->flash_start = 0xA00000; bp->eeprom_map = fb_eeprom_map; bp->sma_op = &ocp_adva_sma_op; + bp->signals_nr = 2; + bp->freq_in_nr = 2; version = ioread32(&bp->image->version); /* if lower 16 bits are empty, this is the fw loader. */ @@ -4008,7 +4018,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) { struct signal_reg __iomem *reg = bp->signal_out[nr]->mem; struct ptp_ocp_signal *signal = &bp->signal[nr]; - char label[8]; + char label[16]; bool on; u32 val; @@ -4031,7 +4041,7 @@ static void _frequency_summary_show(struct seq_file *s, int nr, struct frequency_reg __iomem *reg) { - char label[8]; + char label[16]; bool on; u32 val; @@ -4175,11 +4185,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) } if (bp->fw_cap & OCP_CAP_SIGNAL) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->signals_nr; i++) _signal_summary_show(s, bp, i); if (bp->fw_cap & OCP_CAP_FREQ) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->freq_in_nr; i++) _frequency_summary_show(s, i, bp->freq_in[i]); if (bp->irig_out) { diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index 59eb23d467ec..198d45f8e884 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -132,7 +132,7 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { - struct of_regulator_match matches[MAX20086_MAX_REGULATORS] = { }; + struct of_regulator_match *matches; struct device_node *node; unsigned int i; int ret; @@ -143,6 +143,11 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) return -ENODEV; } + matches = devm_kcalloc(chip->dev, chip->info->num_outputs, + sizeof(*matches), GFP_KERNEL); + if (!matches) + return -ENOMEM; + for (i = 0; i < chip->info->num_outputs; ++i) matches[i].name = max20086_output_names[i]; diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 775b056d795a..2c7e519a2254 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -456,7 +456,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss, if (wcnss->num_pds) { info += wcnss->num_pds; /* Handle single power domain case */ - num_vregs += num_pd_vregs - wcnss->num_pds; + if (wcnss->num_pds < num_pd_vregs) + num_vregs += num_pd_vregs - wcnss->num_pds; } else { num_vregs += num_pd_vregs; } diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5a3c670aec27..5522310bab8d 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -403,6 +403,7 @@ config SCSI_ACARD config SCSI_AHA152X tristate "Adaptec AHA152X/2825 support" depends on ISA && SCSI + depends on !HIGHMEM select SCSI_SPI_ATTRS select CHECK_SIGNATURE help @@ -795,6 +796,7 @@ config SCSI_PPA tristate "IOMEGA parallel port (ppa - older drives)" depends on SCSI && PARPORT_PC depends on HAS_IOPORT + depends on !HIGHMEM help This driver supports older versions of IOMEGA's parallel port ZIP drive (a 100 MB removable media device). @@ -822,6 +824,7 @@ config SCSI_PPA config SCSI_IMM tristate "IOMEGA parallel port (imm - newer drives)" depends on SCSI && PARPORT_PC + depends on !HIGHMEM help This driver supports newer versions of IOMEGA's parallel port ZIP drive (a 100 MB removable media device). diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 4276f868cd91..e94c0a19c435 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -746,7 +746,6 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup) /* need to have host registered before triggering any interrupt */ list_add_tail(&HOSTDATA(shpnt)->host_list, &aha152x_host_list); - shpnt->no_highmem = true; shpnt->io_port = setup->io_port; shpnt->n_io_port = IO_RANGE; shpnt->irq = setup->irq; diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 1d4c7310f1a6..0821cf994b98 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -1224,7 +1224,6 @@ static int __imm_attach(struct parport *pb) host = scsi_host_alloc(&imm_template, sizeof(imm_struct *)); if (!host) goto out1; - host->no_highmem = true; host->io_port = pb->base; host->n_io_port = ports; host->dma_channel = -1; diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index a06329b47851..1ed3171f1797 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -1104,7 +1104,6 @@ static int __ppa_attach(struct parport *pb) host = scsi_host_alloc(&ppa_template, sizeof(ppa_struct *)); if (!host) goto out1; - host->no_highmem = true; host->io_port = pb->base; host->n_io_port = ports; host->dma_channel = -1; diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index 2fa45556e1ea..0ddc95bafc71 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -601,7 +601,7 @@ static int sg_scsi_ioctl(struct request_queue *q, bool open_for_write, } if (bytes) { - err = blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO); + err = blk_rq_map_kern(rq, buffer, bytes, GFP_NOIO); if (err) goto error; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1b43013d72c0..144c72f0737a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -313,8 +313,7 @@ retry: return PTR_ERR(req); if (bufflen) { - ret = blk_rq_map_kern(sdev->request_queue, req, - buffer, bufflen, GFP_NOIO); + ret = blk_rq_map_kern(req, buffer, bufflen, GFP_NOIO); if (ret) goto out; } @@ -2004,9 +2003,6 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) lim->dma_alignment = max_t(unsigned int, shost->dma_alignment, dma_get_cache_alignment() - 1); - if (shost->no_highmem) - lim->features |= BLK_FEAT_BOUNCE_HIGH; - /* * Propagate the DMA formation properties to the dma-mapping layer as * a courtesy service to the LLDDs. This needs to check that the buses diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7a447ff600d2..a8db66428f80 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, unsigned int nr_zones, size_t *buflen) { struct request_queue *q = sdkp->disk->queue; + unsigned int max_segments; size_t bufsize; void *buf; @@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, * Furthermore, since the report zone command cannot be split, make * sure that the allocated buffer can always be mapped by limiting the * number of pages allocated to the HBA max segments limit. + * Since max segments can be larger than the max inline bio vectors, + * further limit the allocated buffer to BIO_MAX_INLINE_VECS. */ nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); bufsize = min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT); - bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q)); + bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 35db061ae3ec..2e6b2412d2c9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1819,6 +1819,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) return SCSI_MLQUEUE_DEVICE_BUSY; } + payload->rangecount = 1; payload->range.len = length; payload->range.offset = offset_in_hvpg; diff --git a/drivers/soc/samsung/exynos-usi.c b/drivers/soc/samsung/exynos-usi.c index c5661ac19f7b..5f7bdf3bab05 100644 --- a/drivers/soc/samsung/exynos-usi.c +++ b/drivers/soc/samsung/exynos-usi.c @@ -233,7 +233,7 @@ static void exynos_usi_unconfigure(void *data) /* Make sure that we've stopped providing the clock to USI IP */ val = readl(usi->regs + USI_OPTION); val &= ~USI_OPTION_CLKREQ_ON; - val |= ~USI_OPTION_CLKSTOP_ON; + val |= USI_OPTION_CLKSTOP_ON; writel(val, usi->regs + USI_OPTION); /* Set USI block state to reset */ diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 6f8a20014e76..39aecd34c641 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -122,6 +122,10 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, set_bit(SDW_GROUP13_DEV_NUM, bus->assigned); set_bit(SDW_MASTER_DEV_NUM, bus->assigned); + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; + /* * SDW is an enumerable bus, but devices can be powered off. So, * they won't be able to report as present. @@ -138,6 +142,7 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, if (ret < 0) { dev_err(bus->dev, "Finding slaves failed:%d\n", ret); + sdw_irq_delete(bus); return ret; } @@ -156,10 +161,6 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - ret = sdw_irq_create(bus, fwnode); - if (ret) - return ret; - return 0; } EXPORT_SYMBOL(sdw_bus_master_add); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 067c954cb6ea..863781ba6c16 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // // Copyright 2013 Freescale Semiconductor, Inc. -// Copyright 2020 NXP +// Copyright 2020-2025 NXP // // Freescale DSPI driver // This file contains a driver for the Freescale DSPI @@ -62,6 +62,7 @@ #define SPI_SR_TFIWF BIT(18) #define SPI_SR_RFDF BIT(17) #define SPI_SR_CMDFFF BIT(16) +#define SPI_SR_TXRXS BIT(30) #define SPI_SR_CLEAR (SPI_SR_TCFQF | \ SPI_SR_TFUF | SPI_SR_TFFF | \ SPI_SR_CMDTCF | SPI_SR_SPEF | \ @@ -921,9 +922,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, struct spi_transfer *transfer; bool cs = false; int status = 0; + u32 val = 0; + bool cs_change = false; message->actual_length = 0; + /* Put DSPI in running mode if halted. */ + regmap_read(dspi->regmap, SPI_MCR, &val); + if (val & SPI_MCR_HALT) { + regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_HALT, 0); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + !(val & SPI_SR_TXRXS)) + ; + } + list_for_each_entry(transfer, &message->transfers, transfer_list) { dspi->cur_transfer = transfer; dspi->cur_msg = message; @@ -953,6 +965,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->tx_cmd |= SPI_PUSHR_CMD_CONT; } + cs_change = transfer->cs_change; dspi->tx = transfer->tx_buf; dspi->rx = transfer->rx_buf; dspi->len = transfer->len; @@ -962,6 +975,8 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF); + regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); + spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); @@ -988,6 +1003,15 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi_deassert_cs(spi, &cs); } + if (status || !cs_change) { + /* Put DSPI in stop mode */ + regmap_update_bits(dspi->regmap, SPI_MCR, + SPI_MCR_HALT, SPI_MCR_HALT); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + val & SPI_SR_TXRXS) + ; + } + message->status = status; spi_finalize_current_message(ctlr); @@ -1167,6 +1191,20 @@ static int dspi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(dspi_pm, dspi_suspend, dspi_resume); +static const struct regmap_range dspi_yes_ranges[] = { + regmap_reg_range(SPI_MCR, SPI_MCR), + regmap_reg_range(SPI_TCR, SPI_CTAR(3)), + regmap_reg_range(SPI_SR, SPI_TXFR3), + regmap_reg_range(SPI_RXFR0, SPI_RXFR3), + regmap_reg_range(SPI_CTARE(0), SPI_CTARE(3)), + regmap_reg_range(SPI_SREX, SPI_SREX), +}; + +static const struct regmap_access_table dspi_access_table = { + .yes_ranges = dspi_yes_ranges, + .n_yes_ranges = ARRAY_SIZE(dspi_yes_ranges), +}; + static const struct regmap_range dspi_volatile_ranges[] = { regmap_reg_range(SPI_MCR, SPI_TCR), regmap_reg_range(SPI_SR, SPI_SR), @@ -1184,6 +1222,8 @@ static const struct regmap_config dspi_regmap_config = { .reg_stride = 4, .max_register = 0x88, .volatile_table = &dspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }; static const struct regmap_range dspi_xspi_volatile_ranges[] = { @@ -1205,6 +1245,8 @@ static const struct regmap_config dspi_xspi_regmap_config[] = { .reg_stride = 4, .max_register = 0x13c, .volatile_table = &dspi_xspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }, { .name = "pushr", @@ -1227,6 +1269,8 @@ static int dspi_init(struct fsl_dspi *dspi) if (!spi_controller_is_target(dspi->ctlr)) mcr |= SPI_MCR_HOST; + mcr |= SPI_MCR_HALT; + regmap_write(dspi->regmap, SPI_MCR, mcr); regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index 31a878d9458d..7740f94847a8 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -420,7 +420,7 @@ MODULE_LICENSE("GPL"); static void spi_test_print_hex_dump(char *pre, const void *ptr, size_t len) { /* limit the hex_dump */ - if (len < 1024) { + if (len <= 1024) { print_hex_dump(KERN_INFO, pre, DUMP_PREFIX_OFFSET, 16, 1, ptr, len, 0); diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index f89826d7dc49..aa92fd5a35a9 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -264,6 +264,9 @@ static int sun4i_spi_transfer_one(struct spi_controller *host, else reg |= SUN4I_CTL_DHB; + /* Now that the settings are correct, enable the interface */ + reg |= SUN4I_CTL_ENABLE; + sun4i_spi_write(sspi, SUN4I_CTL_REG, reg); /* Ensure that we have a parent clock fast enough */ @@ -404,7 +407,7 @@ static int sun4i_spi_runtime_resume(struct device *dev) } sun4i_spi_write(sspi, SUN4I_CTL_REG, - SUN4I_CTL_ENABLE | SUN4I_CTL_MASTER | SUN4I_CTL_TP); + SUN4I_CTL_MASTER | SUN4I_CTL_TP); return 0; diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 2a8bb798e95b..795a8482c2c7 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || - (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || - (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { + if ((setup->value && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->value && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->value && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 496abf8e55e0..2841d14914b7 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -329,6 +329,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) tj_max = intel_tcc_get_tjmax(cpu); if (tj_max < 0) return tj_max; + tj_max *= 1000; zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL); if (!zonedev) diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 12e866fb311d..0a800ba53816 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -475,7 +475,7 @@ static void reply_ump_stream_ep_info(struct f_midi2_ep *ep) /* reply a UMP EP device info */ static void reply_ump_stream_ep_device(struct f_midi2_ep *ep) { - struct snd_ump_stream_msg_devince_info rep = { + struct snd_ump_stream_msg_device_info rep = { .type = UMP_MSG_TYPE_STREAM, .status = UMP_STREAM_MSG_STATUS_DEVICE_INFO, .manufacture_id = ep->info.manufacturer, diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index d36f3b6992bb..152ee3376550 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -1056,13 +1056,20 @@ int usb_stor_probe1(struct us_data **pus, goto BadDevice; /* - * Some USB host controllers can't do DMA; they have to use PIO. - * For such controllers we need to make sure the block layer sets - * up bounce buffers in addressable memory. + * Some USB host controllers can't do DMA: They have to use PIO, or they + * have to use a small dedicated local memory area, or they have other + * restrictions on addressable memory. + * + * We can't support these controllers on highmem systems as we don't + * kmap or bounce buffer. */ - if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) || - bus_to_hcd(us->pusb_dev->bus)->localmem_pool) - host->no_highmem = true; + if (IS_ENABLED(CONFIG_HIGHMEM) && + (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) || + bus_to_hcd(us->pusb_dev->bus)->localmem_pool)) { + dev_warn(&intf->dev, "USB Mass Storage not supported on this host controller\n"); + result = -EINVAL; + goto release; + } /* Get the unusual_devs entries and the descriptors */ result = get_device_info(us, id, unusual_dev); @@ -1081,6 +1088,7 @@ int usb_stor_probe1(struct us_data **pus, BadDevice: usb_stor_dbg(us, "storage_probe() failed\n"); +release: release_everything(us); return result; } |