diff options
94 files changed, 1319 insertions, 812 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index d4140dc6c5ba..615453fcc9ff 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of The file is read only. -What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count +What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count Date: February 2018 Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com> Description: This file shows the total physical memory resources. This is diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml index 4dd2dc9c678b..8afbd9ebd73f 100644 --- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -80,6 +80,8 @@ examples: interrupt-parent = <&intc>; interrupts = <296 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; + phy-handle = <&phy0>; + phy-mode = "rgmii-id"; resets = <&rst 30>; reset-names = "stmmaceth"; snps,multicast-filter-bins = <0>; @@ -91,7 +93,6 @@ examples: snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; snps,axi-config = <&gmac0_stmmac_axi_setup>; - status = "disabled"; gmac0_mtl_rx_setup: rx-queues-config { snps,rx-queues-to-use = <8>; diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index c7904a1bc167..36cc7afc2527 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -16,11 +16,13 @@ User interface Creating a TLS connection ------------------------- -First create a new TCP socket and set the TLS ULP. +First create a new TCP socket and once the connection is established set the +TLS ULP. .. code-block:: c sock = socket(AF_INET, SOCK_STREAM, 0); + connect(sock, addr, addrlen); setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls")); Setting the TLS ULP allows us to set/get TLS socket options. Currently diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 1ac62dc3a66f..e1755610b4bc 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). Co-posting selftests --------------------- +~~~~~~~~~~~~~~~~~~~~ Selftests should be part of the same series as the code changes. Specifically for fixes both code change and related test should go into diff --git a/MAINTAINERS b/MAINTAINERS index 4bac4ea21b64..fad6cb025a19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15550,6 +15550,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed <saeedm@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org S: Maintained W: https://www.nvidia.com/networking/ @@ -15619,6 +15620,7 @@ MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@nvidia.com> M: Leon Romanovsky <leonro@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Maintained @@ -21198,7 +21200,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER @@ -22586,9 +22588,11 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS +M: D. Wythe <alibuda@linux.alibaba.com> +M: Dust Li <dust.li@linux.alibaba.com> +M: Sidraya Jayagond <sidraya@linux.ibm.com> M: Wenjia Zhang <wenjia@linux.ibm.com> -M: Jan Karcher <jaka@linux.ibm.com> -R: D. Wythe <alibuda@linux.alibaba.com> +R: Mahanta Jambigi <mjambigi@linux.ibm.com> R: Tony Lu <tonylu@linux.alibaba.com> R: Wen Gu <guwen@linux.alibaba.com> L: linux-rdma@vger.kernel.org diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2fbee3887d13..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9979a351577f..81cf3c902e81 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, out_unlock: mutex_unlock(&table->lock); if (ret) - pr_warn("%s: unable to add gid %pI6 error=%d\n", - __func__, gid->raw, ret); + pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); return ret; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c752ae9fad6c..b1c44ec1a3f3 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, end = ALIGN(end, page_size); if (unlikely(end < page_size)) return -EOVERFLOW; + /* + * The mmu notifier can be called within reclaim contexts and takes the + * umem_mutex. This is rare to trigger in testing, teach lockdep about + * it. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + mutex_lock(&umem_odp->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); + fs_reclaim_release(GFP_KERNEL); + } nr_entries = (end - start) >> PAGE_SHIFT; if (!(nr_entries * PAGE_SIZE / page_size)) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index b847084dcd99..a506fafd2b15 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2479da8620ca..843dcd312242 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; @@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd) void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) { - struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS]; + struct mlx5_async_cmd *async_cmd; struct ib_ucontext *ucontext = ufile->ucontext; struct ib_device *device = ucontext->device; struct mlx5_ib_dev *dev = to_mdev(device); @@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) int head = 0; int tail = 0; + async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL); + if (!async_cmd) + return; + list_for_each_entry(uobject, &ufile->uobjects, list) { WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE)); @@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); head++; } + + kfree(async_cmd); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ce7610740412..df6557ddbdfc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 57f9bc2a4a3a..bd35e75d9ce5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && - !to_ib_umem_dmabuf(mr->umem)->pinned; - bool from_cache = !!ent; - int ret = 0; + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); if (is_odp_dma_buf) - dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); + + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; + + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); @@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - goto out; + return 0; } if (ent) { @@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); ret = destroy_mkey(dev, mr); -out: if (is_odp) { if (!ret) to_ib_umem_odp(mr->umem)->private = NULL; @@ -2075,9 +2108,9 @@ out: if (is_odp_dma_buf) { if (!ret) to_ib_umem_dmabuf(mr->umem)->private = NULL; - dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } - return ret; } @@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index eaa2f9f5f3a9..f6abd64f07f7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) } if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - __xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ @@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, } if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); __xa_erase(&imr->implicit_children, idx); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1378651735f6..23ed2fc688f0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; - target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) + if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + else target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 488e346047c1..77230fbe07be 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip, unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int data, mask, ret = -EINVAL; int nr_irqs, irq_base = -1; - struct device_node *node = i2c->dev.of_node; mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR | PM8607_B0_MISC1_INT_MASK; @@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip, ret = -EBUSY; goto out; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0, + irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0, &pm860x_irq_domain_ops, chip); chip->core_irq = i2c->irq; if (!chip->core_irq) diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c index 78b16c67a5fc..25377dcce60e 100644 --- a/drivers/mfd/max8925-core.c +++ b/drivers/mfd/max8925-core.c @@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, { unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int ret; - struct device_node *node = chip->dev->of_node; /* clear all interrupts */ max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1); @@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, return -EBUSY; } - irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0, - &max8925_irq_domain_ops, chip); + irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS, + chip->irq_base, 0, &max8925_irq_domain_ops, + chip); /* request irq handler for pmic main irq*/ chip->core_irq = irq; diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 232c2bfe8c18..d3ab40651307 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num) static struct irq_chip twl4030_irq_chip; int status, i; int irq_base, irq_end, nr_irqs; - struct device_node *node = dev->of_node; /* * TWL core and pwr interrupts must be contiguous because @@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num) return irq_base; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0, + irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0, &irq_domain_simple_ops, NULL); irq_end = irq_base + TWL4030_CORE_NR_IRQS; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 7f893bafaa60..c417ed34c057 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY), + /* + * Some SD cards reports discard support while they don't + */ + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_DISCARD), + END_FIXUP }; @@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), - /* - * Some SD cards reports discard support while they don't - */ - MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, - MMC_QUIRK_BROKEN_SD_DISCARD), - END_FIXUP }; diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c index 1c31d0dfa961..de17d1611290 100644 --- a/drivers/mmc/core/sd_uhs2.c +++ b/drivers/mmc/core/sd_uhs2.c @@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host) err = host->ops->uhs2_control(host, UHS2_PHY_INIT); if (err) { - pr_err("%s: failed to initial phy for UHS-II!\n", - mmc_hostname(host)); + pr_debug("%s: failed to initial phy for UHS-II!\n", + mmc_hostname(host)); } return err; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 31eb90536bce..d7020e06dd55 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma, static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data) { if (!(data->host_cookie & MSDC_PREPARE_FLAG)) { - data->host_cookie |= MSDC_PREPARE_FLAG; data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len, mmc_get_dma_dir(data)); + if (data->sg_count) + data->host_cookie |= MSDC_PREPARE_FLAG; } } +static bool msdc_data_prepared(struct mmc_data *data) +{ + return data->host_cookie & MSDC_PREPARE_FLAG; +} + static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data) { if (data->host_cookie & MSDC_ASYNC_FLAG) @@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(!host->hsq_en && host->mrq); host->mrq = mrq; - if (mrq->data) + if (mrq->data) { msdc_prepare_data(host, mrq->data); + if (!msdc_data_prepared(mrq->data)) { + host->mrq = NULL; + /* + * Failed to prepare DMA area, fail fast before + * starting any commands. + */ + mrq->cmd->error = -ENOSPC; + mmc_request_done(mmc_from_priv(host), mrq); + return; + } + } /* if SBC is required, we have HW option and SW option. * if HW option is enabled, and SBC does not have "special" flags, diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c index 6880d3e9ab62..2e5da7c5834c 100644 --- a/drivers/mmc/host/sdhci-of-k1.c +++ b/drivers/mmc/host/sdhci-of-k1.c @@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev) host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY; - if (spacemit_sdhci_get_clocks(dev, pltfm_host)) + ret = spacemit_sdhci_get_clocks(dev, pltfm_host); + if (ret) goto err_pltfm; ret = sdhci_add_host(host); diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c index c53b64d50c0d..0efeb9d0c376 100644 --- a/drivers/mmc/host/sdhci-uhs2.c +++ b/drivers/mmc/host/sdhci-uhs2.c @@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask) /* hw clears the bit when it's done */ if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10, UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) { - pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, - mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); + pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, + mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET); return; } @@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT), 100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms"); return -EIO; } @@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC), 100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms"); return -EIO; } @@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc) host->ops->uhs2_pre_detect_init(host); if (sdhci_uhs2_interface_detect(host)) { - pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); + pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); return -EIO; } if (sdhci_uhs2_init(host)) { - pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); + pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); return -EIO; } @@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE), 100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms"); return -EIO; } return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f008167d1863..e116f2db34d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; - clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (clk & SDHCI_CLOCK_CARD_EN) - sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN, - SDHCI_CLOCK_CONTROL); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - if (clock == 0) { - sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); + if (clock == 0) return; - } clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock); sdhci_enable_clk(host, clk); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index f9d65dd0f2b2..70ada1857a4c 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en); void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable); void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd); +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define SDHCI_DBG_ANYWAY 0 +#elif defined(DEBUG) +#define SDHCI_DBG_ANYWAY 1 +#else +#define SDHCI_DBG_ANYWAY 0 +#endif + +#define sdhci_dbg_dumpregs(host, fmt) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \ + sdhci_dumpregs(host); \ +} while (0) + #endif /* __SDHCI_HW_H */ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index e1296cbf4ff3..9316de4126cf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1269,6 +1269,8 @@ #define MDIO_VEND2_CTRL1_SS13 BIT(13) #endif +#define XGBE_VEND2_MAC_AUTO_SW BIT(9) + /* MDIO mask values */ #define XGBE_AN_CL73_INT_CMPLT BIT(0) #define XGBE_AN_CL73_INC_LINK BIT(1) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 71449edbb76d..1a37ec45e650 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, reg |= MDIO_VEND2_CTRL1_AN_RESTART; XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL); + reg |= XGBE_VEND2_MAC_AUTO_SW; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg); } static void xgbe_an37_restart(struct xgbe_prv_data *pdata) @@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); + + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1); + reg &= ~MDIO_AN_CTRL1_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); + } static void xgbe_an73_init(struct xgbe_prv_data *pdata) @@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, &an_restart); + /* bail out if the link status register read fails */ + if (pdata->phy.link < 0) + return; + if (an_restart) { xgbe_phy_config_aneg(pdata); goto adjust_link; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 7a4dfa4e19c7..23c39e92e783 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; - int ret; + int reg, ret; *an_restart = 0; @@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 0; } - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. + */ + + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + } if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, @@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - /* check again for the link and adaptation status */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if (pdata->rx_adapt_done) return 1; } else if (reg & MDIO_STAT1_LSTATUS) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 6359bb87dc13..057379cd43ba 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -183,12 +183,12 @@ #define XGBE_LINK_TIMEOUT 5 #define XGBE_KR_TRAINING_WAIT_ITER 50 -#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_10 0x00 #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 -#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) +#define XGBE_SGMII_AN_LINK_STATUS BIT(4) /* ECC correctable error notification window (seconds) */ #define XGBE_ECC_LIMIT 60 diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index cfdb546a09e7..98a4d089270e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) break; } - buffer_info->alloced = 1; - buffer_info->skb = skb; - buffer_info->length = (u16) adapter->rx_buffer_len; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); buffer_info->dma = dma_map_page(&pdev->dev, page, offset, adapter->rx_buffer_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + kfree_skb(skb); + adapter->soft_stats.rx_dropped++; + break; + } + + buffer_info->alloced = 1; + buffer_info->skb = skb; + buffer_info->length = (u16)adapter->rx_buffer_len; + rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len); rfd_desc->coalese = 0; @@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb, return 0; } -static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, - struct tx_packet_desc *ptpd) +static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, + struct tx_packet_desc *ptpd) { struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; struct atl1_buffer *buffer_info; @@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, unsigned int nr_frags; unsigned int f; int retval; + u16 first_mapped; u16 next_to_use; u16 data_len; u8 hdr_len; @@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buf_len -= skb->data_len; nr_frags = skb_shinfo(skb)->nr_frags; next_to_use = atomic_read(&tpd_ring->next_to_use); + first_mapped = next_to_use; buffer_info = &tpd_ring->buffer_info[next_to_use]; BUG_ON(buffer_info->skb); /* put skb in last TPD */ @@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, page, offset, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, buf_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev, frag, i * ATL1_MAX_TX_BUF_LEN, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, /* last tpd's buffer-info */ buffer_info->skb = skb; + + return true; + + dma_err: + while (first_mapped != next_to_use) { + buffer_info = &tpd_ring->buffer_info[first_mapped]; + dma_unmap_page(&adapter->pdev->dev, + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); + buffer_info->dma = 0; + + if (++first_mapped == tpd_ring->count) + first_mapped = 0; + } + return false; } static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count, @@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, len = skb_headlen(skb); - if (unlikely(skb->len <= 0)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(skb->len <= 0)) + goto drop_packet; nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { @@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, if (mss) { if (skb->protocol == htons(ETH_P_IP)) { proto_hdr_len = skb_tcp_all_headers(skb); - if (unlikely(proto_hdr_len > len)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(proto_hdr_len > len)) + goto drop_packet; + /* need additional TPD ? */ if (proto_hdr_len != len) count += (len - proto_hdr_len + @@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, } tso = atl1_tso(adapter, skb, ptpd); - if (tso < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (tso < 0) + goto drop_packet; if (!tso) { ret_val = atl1_tx_csum(adapter, skb, ptpd); - if (ret_val < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (ret_val < 0) + goto drop_packet; } - atl1_tx_map(adapter, skb, ptpd); + if (!atl1_tx_map(adapter, skb, ptpd)) + goto drop_packet; + atl1_tx_queue(adapter, count, ptpd); atl1_update_mailbox(adapter); return NETDEV_TX_OK; + +drop_packet: + adapter->soft_stats.tx_errors++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static int atl1_rings_clean(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 773f5ad972a2..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + new_mtu, enic->port_mtu); return _enic_change_mtu(netdev, new_mtu); } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ec2c3dab250..b82f121cadad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv, MEM_TYPE_PAGE_ORDER0, NULL); if (err) { dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n"); + xdp_rxq_info_unreg(&fq->channel->xdp_rxq); return err; } @@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv) return -EINVAL; } if (err) - return err; + goto out; } err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX, &priv->tx_qdid); if (err) { dev_err(dev, "dpni_get_qdid() failed\n"); - return err; + goto out; } return 0; + +out: + while (i--) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } + return err; } /* Allocate rings for storing incoming frame descriptors */ @@ -4825,6 +4834,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv) } } +static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_fqs; i++) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } +} + static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) { struct device *dev; @@ -5028,6 +5048,7 @@ err_alloc_percpu_extras: free_percpu(priv->percpu_stats); err_alloc_percpu_stats: dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); err_bind: dpaa2_eth_free_dpbps(priv); err_dpbp_setup: @@ -5080,6 +5101,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) free_percpu(priv->percpu_extras); dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); dpaa2_eth_free_dpbps(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index b28991dd1870..48b8e184f3db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) */ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) { - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* free ring buffers and the ring itself */ idpf_ctlq_dealloc_ring_res(hw, cq); @@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) /* Set ring_size to 0 to indicate uninitialized queue */ cq->ring_size = 0; - mutex_unlock(&cq->cq_lock); - mutex_destroy(&cq->cq_lock); + spin_unlock(&cq->cq_lock); } /** @@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw, idpf_ctlq_init_regs(hw, cq, is_rxq); - mutex_init(&cq->cq_lock); + spin_lock_init(&cq->cq_lock); list_add(&cq->cq_list, &hw->cq_list_head); @@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, int err = 0; int i; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* Ensure there are enough descriptors to send all messages */ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); @@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_use); err_unlock: - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); return err; } @@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (*clean_count > cq->ring_size) return -EBADR; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* Return number of descriptors actually cleaned */ *clean_count = i; @@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, if (*buff_count > 0) buffs_avail = true; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); if (tbp >= cq->ring_size) tbp = 0; @@ -524,7 +523,7 @@ post_buffs_out: wr32(hw, cq->reg.tail, cq->next_to_post); } - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* return the number of buffers that were not posted */ *buff_count = *buff_count - i; @@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, u16 i; /* take the lock before we start messing with the ring */ - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); *num_q_msg = i; if (*num_q_msg == 0) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index 9642494a67d8..3414c5f9a831 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -99,7 +99,7 @@ struct idpf_ctlq_info { enum idpf_ctlq_type cq_type; int q_id; - struct mutex cq_lock; /* control queue lock */ + spinlock_t cq_lock; /* control queue lock */ /* used for interrupt processing */ u16 next_to_use; u16 next_to_clean; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 9bdb309b668e..eaf7a2606faa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; @@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 4eb20ec2accb..80382ff4a5fa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) struct idpf_adapter *adapter = hw->back; size_t sz = ALIGN(size, 4096); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, - &mem->pa, GFP_KERNEL); + /* The control queue resources are freed under a spinlock, contiguous + * pages will avoid IOMMU remapping and the use vmap (and vunmap in + * dma_free_*() path. + */ + mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa, + GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = sz; return mem->va; @@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) { struct idpf_adapter *adapter = hw->back; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, mem->pa); + dma_free_attrs(&adapter->pdev->dev, mem->size, + mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = 0; mem->va = NULL; mem->pa = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 686793c539f2..031c332f66c4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7115,6 +7115,10 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + err = pci_save_state(pdev); if (err) goto err_ioremap; @@ -7500,6 +7504,9 @@ static int __igc_resume(struct device *dev, bool rpm) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + if (igc_init_interrupt_scheme(adapter, true)) { netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; @@ -7625,6 +7632,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); + /* In case of PCI error, adapter loses its HW address * so we should re-assign it here. */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ddca8fc7883e..26119d02a94d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp, addr = np->ops->map_page(np->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (!addr) { + if (np->ops->mapping_error(np->device, addr)) { __free_page(page); return -ENOMEM; } @@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, len = skb_headlen(skb); mapping = np->ops->map_single(np->device, skb->data, len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_drop; prod = rp->prod; @@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, mapping = np->ops->map_page(np->device, skb_frag_page(frag), skb_frag_off(frag), len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_unmap; rp->tx_buffs[prod].skb = NULL; rp->tx_buffs[prod].mapping = mapping; @@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, out: return NETDEV_TX_OK; +out_unmap: + while (i--) { + const skb_frag_t *frag; + + prod = PREVIOUS_TX(rp, prod); + frag = &skb_shinfo(skb)->frags[i]; + np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping, + skb_headlen(skb), DMA_TO_DEVICE); + out_drop: rp->tx_errors++; kfree_skb(skb); @@ -9644,6 +9661,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address, dma_unmap_single(dev, dma_address, size, direction); } +static int niu_pci_mapping_error(struct device *dev, u64 addr) +{ + return dma_mapping_error(dev, addr); +} + static const struct niu_ops niu_pci_ops = { .alloc_coherent = niu_pci_alloc_coherent, .free_coherent = niu_pci_free_coherent, @@ -9651,6 +9673,7 @@ static const struct niu_ops niu_pci_ops = { .unmap_page = niu_pci_unmap_page, .map_single = niu_pci_map_single, .unmap_single = niu_pci_unmap_single, + .mapping_error = niu_pci_mapping_error, }; static void niu_driver_version(void) @@ -10019,6 +10042,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address, /* Nothing to do. */ } +static int niu_phys_mapping_error(struct device *dev, u64 dma_address) +{ + return false; +} + static const struct niu_ops niu_phys_ops = { .alloc_coherent = niu_phys_alloc_coherent, .free_coherent = niu_phys_free_coherent, @@ -10026,6 +10054,7 @@ static const struct niu_ops niu_phys_ops = { .unmap_page = niu_phys_unmap_page, .map_single = niu_phys_map_single, .unmap_single = niu_phys_unmap_single, + .mapping_error = niu_phys_mapping_error, }; static int niu_of_probe(struct platform_device *op) diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h index 04c215f91fc0..0b169c08b0f2 100644 --- a/drivers/net/ethernet/sun/niu.h +++ b/drivers/net/ethernet/sun/niu.h @@ -2879,6 +2879,9 @@ struct tx_ring_info { #define NEXT_TX(tp, index) \ (((index) + 1) < (tp)->pending ? ((index) + 1) : 0) +#define PREVIOUS_TX(tp, index) \ + (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1)) + static inline u32 niu_tx_avail(struct tx_ring_info *tp) { return (tp->pending - @@ -3140,6 +3143,7 @@ struct niu_ops { enum dma_data_direction direction); void (*unmap_single)(struct device *dev, u64 dma_address, size_t size, enum dma_data_direction direction); + int (*mapping_error)(struct device *dev, u64 dma_address); }; struct niu_link_config { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index c57cc4f27249..55e252789db3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1705,6 +1705,7 @@ static void wx_set_rss_queues(struct wx *wx) clear_bit(WX_FLAG_FDIR_HASH, wx->flags); + wx->ring_feature[RING_F_FDIR].indices = 1; /* Use Flow Director in addition to RSS to ensure the best * distribution of flows across cores, even when an FDIR flow * isn't matched. @@ -1746,7 +1747,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = { .pre_vectors = 1 }; + struct irq_affinity affd = { .post_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ @@ -1783,16 +1784,24 @@ static int wx_acquire_msix_vectors(struct wx *wx) return nvecs; } - wx->msix_entry->entry = 0; - wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); nvecs -= 1; for (i = 0; i < nvecs; i++) { wx->msix_q_entries[i].entry = i; - wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1); + wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i); } wx->num_q_vectors = nvecs; + wx->msix_entry->entry = nvecs; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs); + + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) { + wx->msix_entry->entry = 0; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); + wx->msix_q_entries[0].entry = 0; + wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1); + } + return 0; } @@ -2291,6 +2300,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction, if (direction == -1) { /* other causes */ + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) + msix_vector = 0; msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = 0; ivar = rd32(wx, WX_PX_MISC_IVAR); @@ -2299,8 +2310,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ - if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7)) - msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2339,7 +2348,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector) itr_reg |= WX_PX_ITR_CNT_WDIS; - wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg); + wr32(wx, WX_PX_ITR(v_idx), itr_reg); } /** @@ -2392,9 +2401,9 @@ void wx_configure_vectors(struct wx *wx) wx_write_eitr(q_vector); } - wx_set_ivar(wx, -1, 0, 0); + wx_set_ivar(wx, -1, 0, v_idx); if (pdev->msix_enabled) - wr32(wx, WX_PX_ITR(0), 1950); + wr32(wx, WX_PX_ITR(v_idx), 1950); } EXPORT_SYMBOL(wx_configure_vectors); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index e8656d9d733b..c82ae137756c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx) wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0); wx->ring_feature[RING_F_VMDQ].offset = 0; + clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); /* Disable VMDq flag so device will be set in NM mode */ if (wx->ring_feature[RING_F_VMDQ].limit == 1) @@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs); + if (num_vfs == 7 && wx->mac.type == wx_mac_em) + set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); + /* Enable VMDq flag so device will be set in VM mode */ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); if (!wx->ring_feature[RING_F_VMDQ].limit) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 7730c9fc3e02..c363379126c0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1191,6 +1191,7 @@ enum wx_pf_flags { WX_FLAG_VMDQ_ENABLED, WX_FLAG_VLAN_PROMISC, WX_FLAG_SRIOV_ENABLED, + WX_FLAG_IRQ_VECTOR_SHARED, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1343,7 +1344,7 @@ struct wx { }; #define WX_INTR_ALL (~0ULL) -#define WX_INTR_Q(i) BIT((i) + 1) +#define WX_INTR_Q(i) BIT((i)) /* register operations */ #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index b5022c49dc5e..e0fc897b0a58 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues) if (queues) wx_intr_enable(wx, NGBE_INTR_ALL); else - wx_intr_enable(wx, NGBE_INTR_MISC); + wx_intr_enable(wx, NGBE_INTR_MISC(wx)); } /** @@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) * for queue. But when num_vfs == 7, vector[1] is assigned to vf6. * Misc and queue should reuse interrupt vector[0]. */ - if (wx->num_vfs == 7) + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) err = request_irq(wx->msix_entry->vector, ngbe_misc_and_queue, 0, netdev->name, wx); else diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index bb74263f0498..3b2ca7f47e33 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -87,7 +87,7 @@ #define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */ #define NGBE_INTR_ALL 0x1FF -#define NGBE_INTR_MISC BIT(0) +#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry) #define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4)) #define NGBE_CFG_LAN_SPEED 0x14440 diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 7dbcf41750c1..dc87ccad9652 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config, wx_fc_enable(wx, tx_pause, rx_pause); txgbe_reconfig_mac(wx); + txgbe_enable_sec_tx_path(wx); txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG); txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 20b9a28bcb55..3885283681ec 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues) wr32(wx, WX_PX_MISC_IEN, misc_ien); /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) wx_intr_enable(wx, TXGBE_INTR_QALL(wx)); } @@ -78,7 +78,6 @@ free_queue_irqs: free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } - wx_reset_interrupt_capability(wx); return err; } @@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) txgbe->eicr = eicr; if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) { wx_msg_task(txgbe->wx); - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); } return IRQ_WAKE_THREAD; } @@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) nhandled++; } - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } @@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) free_irq(txgbe->link_irq, txgbe); free_irq(txgbe->misc.irq, txgbe); txgbe_del_irq_domain(txgbe); + txgbe->wx->misc_irq_domain = false; } int txgbe_setup_misc_irq(struct txgbe *txgbe) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index f3d2778b8e35..a5867f3c93fc 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_queue_irqs(wx); + err = txgbe_setup_misc_irq(wx->priv); if (err) goto err_free_resources; + err = txgbe_request_queue_irqs(wx); + if (err) + goto err_free_misc_irq; + /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); if (err) @@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); +err_free_misc_irq: + txgbe_free_misc_irq(wx->priv); + wx_reset_interrupt_capability(wx); err_free_resources: wx_free_resources(wx); err_reset: @@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev) wx_ptp_stop(wx); txgbe_down(wx); wx_free_irq(wx); + txgbe_free_misc_irq(wx->priv); wx_free_resources(wx); txgbe_fdir_filter_exit(wx); wx_control_hw(wx, false); @@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev) int txgbe_setup_tc(struct net_device *dev, u8 tc) { struct wx *wx = netdev_priv(dev); - struct txgbe *txgbe = wx->priv; /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the @@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) else txgbe_reset(wx); - txgbe_free_misc_irq(txgbe); wx_clear_interrupt_scheme(wx); if (tc) @@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) netdev_reset_tc(dev); wx_init_interrupt_scheme(wx); - txgbe_setup_misc_irq(txgbe); if (netif_running(dev)) txgbe_open(dev); @@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev, txgbe_init_fdir(txgbe); - err = txgbe_setup_misc_irq(txgbe); - if (err) - goto err_release_hw; - err = txgbe_init_phy(txgbe); if (err) - goto err_free_misc_irq; + goto err_release_hw; err = register_netdev(netdev); if (err) @@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev, err_remove_phy: txgbe_remove_phy(txgbe); -err_free_misc_irq: - txgbe_free_misc_irq(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); txgbe_remove_phy(txgbe); - txgbe_free_misc_irq(txgbe); wx_free_isb_resources(wx); pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 42ec815159e8..41915d7dd372 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -302,8 +302,8 @@ struct txgbe_fdir_filter { #define TXGBE_DEFAULT_RX_WORK 128 #endif -#define TXGBE_INTR_MISC BIT(0) -#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1) +#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors) +#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1) #define TXGBE_MAX_EITR GENMASK(11, 3) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f53e255116ea..e3ca6e91efe1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4567,8 +4567,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) if (!dev) return; - netif_napi_del(&dev->napi); - udev = interface_to_usbdev(intf); net = dev->net; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e53ba600605a..5d674eb9a0f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } +static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, + unsigned int len) +{ + unsigned int headroom, tailroom, room, truesize; + + truesize = mergeable_ctx_to_truesize(mrg_ctx); + headroom = mergeable_ctx_to_headroom(mrg_ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + if (len > truesize - room) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + DEV_STATS_INC(dev, rx_length_errors); + return -1; + } + + return 0; +} + static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) @@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi, * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); @@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; @@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ -static struct page *xdp_linearize_page(struct receive_queue *rq, +static struct page *xdp_linearize_page(struct net_device *dev, + struct receive_queue *rq, int *num_buf, struct page *p, int offset, @@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; + /* Only mergeable mode can go inside this while loop. In small mode, + * *num_buf == 1, so it cannot go inside. + */ while (--*num_buf) { unsigned int buflen; void *buf; + void *ctx; int off; - buf = virtnet_rq_get_buf(rq, &buflen, NULL); + buf = virtnet_rq_get_buf(rq, &buflen, &ctx); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); + if (check_mergeable_len(dev, ctx, buflen)) { + put_page(p); + goto err_buf; + } + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ @@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, + xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) @@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - unsigned int headroom, tailroom, room; - unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; + unsigned int truesize; struct page *page; skb_frag_t *frag; int offset; @@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - - cur_frag_size = truesize; - xdp_frags_truesz += cur_frag_size; - if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + if (check_mergeable_len(dev, ctx, len)) { put_page(page); - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); goto err; } + truesize = mergeable_ctx_to_truesize(ctx); + xdp_frags_truesz += truesize; + frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) @@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); @@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; @@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } + truesize = mergeable_ctx_to_truesize(ctx); curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) @@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) else free_old_xmit(sq, txq, !!budget); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3481,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, { int qindex, err; + if (ring_num <= MAX_SKB_FRAGS + 2) { + netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", + ring_num, MAX_SKB_FRAGS + 2); + return -EINVAL; + } + qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index e021f1106bea..cc5d05dc395c 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; - if (sht->max_segment_size) - shost->max_segment_size = sht->max_segment_size; - else - shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + shost->virt_boundary_mask = sht->virt_boundary_mask; + if (shost->virt_boundary_mask) { + WARN_ON_ONCE(sht->max_segment_size && + sht->max_segment_size != UINT_MAX); + shost->max_segment_size = UINT_MAX; + } else { + if (sht->max_segment_size) + shost->max_segment_size = sht->max_segment_size; + else + shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + } /* 32-byte (dword) is a common minimum for HBAs. */ if (sht->dma_alignment) @@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->dma_boundary = 0xffffffff; - if (sht->virt_boundary_mask) - shost->virt_boundary_mask = sht->virt_boundary_mask; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0cd6f3e14882..13b6cb1b93ac 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle, pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb, sizeof(*pdb), DMA_FROM_DEVICE); - if (!pdb_dma) { + if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) { ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n"); return QLA_MEMORY_ALLOC_FAILED; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d4141656b204..a39f1da4ce47 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode) task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data, task->data_count, DMA_TO_DEVICE); + if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma)) + return -ENOMEM; } DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n", diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3f6e87705b62..eeaa6af294b8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); - if (vpd && vpd->len >= 2) + if (vpd && vpd->len >= 6) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index de8b6acd4058..fcb4b14a710f 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1); UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8); UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4); UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1); -UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8); +UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8); UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2); UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1); UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); @@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_logical_block_count.attr, &dev_attr_erase_block_size.attr, &dev_attr_provisioning_type.attr, - &dev_attr_physical_memory_resourse_count.attr, + &dev_attr_physical_memory_resource_count.attr, &dev_attr_context_capabilities.attr, &dev_attr_large_unit_granularity.attr, &dev_attr_wb_buf_alloc_units.attr, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b784aab66867..4397392bfef0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle_done)(struct virtqueue *vq)) { struct vring_virtqueue *vq = to_vvq(_vq); - int err; + int err, err_reset; if (num > vq->vq.num_max) return -E2BIG; @@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, else err = virtqueue_resize_split(_vq, num); - return virtqueue_enable_after_reset(_vq); + err_reset = virtqueue_enable_after_reset(_vq); + if (err_reset) + return err_reset; + + return err; } EXPORT_SYMBOL_GPL(virtqueue_resize); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 9de356bcb411..aa176cc9a324 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -83,6 +83,8 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + /* Set after we add a new block group to the free space tree. */ + BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a3e2a2a81461..a83c268f7f87 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1241,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; + struct rb_node *node; int nr; int ret; @@ -1269,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, btrfs_release_path(path); } + node = rb_first_cached(&trans->fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *bg; + + bg = rb_entry(node, struct btrfs_block_group, cache_node); + clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); + node = rb_next(node); + cond_resched(); + } + return 0; } @@ -1358,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) block_group = rb_entry(node, struct btrfs_block_group, cache_node); + + if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, + &block_group->runtime_flags)) + goto next; + ret = populate_free_space_tree(trans, block_group); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } +next: if (btrfs_should_end_transaction(trans)) { btrfs_end_transaction(trans); trans = btrfs_start_transaction(free_space_root, 1); @@ -1390,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + /* + * While rebuilding the free space tree we may allocate new metadata + * block groups while modifying the free space tree. + * + * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we + * can use multiple transactions, every time btrfs_end_transaction() is + * called at btrfs_rebuild_free_space_tree() we finish the creation of + * new block groups by calling btrfs_create_pending_block_groups(), and + * that in turn calls us, through add_block_group_free_space(), to add + * a free space info item and a free space extent item for the block + * group. + * + * Then later btrfs_rebuild_free_space_tree() may find such new block + * groups and processes them with populate_free_space_tree(), which can + * fail with EEXIST since there are already items for the block group in + * the free space tree. Notice that we say "may find" because a new + * block group may be added to the block groups rbtree in a node before + * or after the block group currently being processed by the rebuild + * process. So signal the rebuild process to skip such new block groups + * if it finds them. + */ + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + ret = add_new_free_space_info(trans, block_group, path); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 26d6ed170a19..fc66872b4c74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; @@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); - if (!ret) { + if (!ret) btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; - } out: btrfs_end_transaction(trans); out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4eda35bdba71..8a60983a697c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_record_new_subvolume(trans, BTRFS_I(dir)); - d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 858b609e292c..cea8a7e9d6d3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r unsigned int nofs_flag; struct btrfs_inode *inode; + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(is_fstree(btrfs_root_id(root))); + /* * We're holding a transaction handle whether we are logging or * replaying a log tree, so we must make sure NOFS semantics apply @@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct btrfs_inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - return NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -674,9 +662,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the @@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1073,7 +1062,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1146,13 +1137,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; @@ -1167,10 +1158,10 @@ again: kfree(victim_name.name); return ret; } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + ret = PTR_ERR(victim_parent); + } else { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); @@ -1315,9 +1306,9 @@ again: struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } @@ -1389,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1409,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); @@ -1682,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } @@ -1720,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *inode; struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; @@ -1761,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { iput(&inode->vfs_inode); - return -EIO; + return PTR_ERR(dir); } ret = btrfs_add_link(trans, dir, inode, name, 1, index); @@ -1845,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -2147,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2301,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2467,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } from = ALIGN(i_size_read(&inode->vfs_inode), @@ -7448,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df4807460596..4bea008dbebd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: + goto out_retry; + case NFS4ERR_DELAY: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); fallthrough; - case -NFS4ERR_GRACE: + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ENETDOWN: case -ENETUNREACH: @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1228,6 +1272,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, clp, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data, task->tk_status); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ab7868807a7..a2fa6bc4d74e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3adb7d0dbec7..1a7ec68bde15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 7839efe050bf..000cc7f4a3ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3444,16 +3444,41 @@ xfs_alloc_read_agf( set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } + #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); + /* + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. + */ + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); + xfs_trans_brelse(tp, agfbp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } } -#endif +#endif /* DEBUG */ + if (agfbpp) *agfbpp = agfbp; else diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0c47b5c6ca7d..750111634d9f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi( set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } +#ifdef DEBUG /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag_mount(pag))); + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + xfs_trans_brelse(tp, agibp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } + } +#endif /* DEBUG */ + if (agibpp) *agibpp = agibp; else diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8af83bd161f9..ba5bd6031ece 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit( return error; } -/* - * Push a single buffer on a delwri queue. - * - * The purpose of this function is to submit a single buffer of a delwri queue - * and return with the buffer still on the original queue. - * - * The buffer locking and queue management logic between _delwri_pushbuf() and - * _delwri_queue() guarantee that the buffer cannot be queued to another list - * before returning. - */ -int -xfs_buf_delwri_pushbuf( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - int error; - - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); - - xfs_buf_lock(bp); - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); - bp->b_flags |= XBF_WRITE; - xfs_buf_submit(bp); - - /* - * The buffer is now locked, under I/O but still on the original delwri - * queue. Wait for I/O completion, restore the DELWRI_Q flag and - * return with the buffer unlocked and still on the original queue. - */ - error = xfs_buf_iowait(bp); - bp->b_flags |= _XBF_DELWRI_Q; - xfs_buf_unlock(bp); - - return error; -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9d2ab567cf81..15fc56948346 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); -extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 90139e0f3271..7fc54725c5f6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } +static void +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->__bli_format; + return; + } + + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), + GFP_KERNEL | __GFP_NOFAIL); +} + +static void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->__bli_format) { + kfree(bip->bli_formats); + bip->bli_formats = NULL; + } +} + +static void +xfs_buf_item_free( + struct xfs_buf_log_item *bip) +{ + xfs_buf_item_free_format(bip); + kvfree(bip->bli_item.li_lv_shadow); + kmem_cache_free(xfs_buf_item_cache, bip); +} + +/* + * xfs_buf_item_relse() is called when the buf log item is no longer needed. + */ +static void +xfs_buf_item_relse( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + + trace_xfs_buf_item_relse(bp, _RET_IP_); + + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ASSERT(atomic_read(&bip->bli_refcount) == 0); + + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +} + /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -390,6 +445,42 @@ xfs_buf_item_pin( } /* + * For a stale BLI, process all the necessary completions that must be + * performed when the final BLI reference goes away. The buffer will be + * referenced and locked here - we return to the caller with the buffer still + * referenced and locked for them to finalise processing of the buffer. + */ +static void +xfs_buf_item_finish_stale( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_item *lip = &bip->bli_item; + + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); + + if (bip->bli_flags & XFS_BLI_STALE_INODE) { + xfs_buf_item_done(bp); + xfs_buf_inode_iodone(bp); + ASSERT(list_empty(&bp->b_li_list)); + return; + } + + /* + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do + * the right thing regardless of the situation in which we are called. + */ + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip); + ASSERT(bp->b_log_item == NULL); +} + +/* * This is called to unpin the buffer associated with the buf log item which was * previously pinned with a call to xfs_buf_item_pin(). We enter this function * with a buffer pin count, a buffer reference and a BLI reference. @@ -438,13 +529,6 @@ xfs_buf_item_unpin( } if (stale) { - ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_flags & XBF_STALE); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(list_empty(&lip->li_trans)); - ASSERT(!bp->b_transp); - trace_xfs_buf_item_unpin_stale(bip); /* @@ -455,22 +539,7 @@ xfs_buf_item_unpin( * processing is complete. */ xfs_buf_rele(bp); - - /* - * If we get called here because of an IO error, we may or may - * not have the item on the AIL. xfs_trans_ail_delete() will - * take care of that situation. xfs_trans_ail_delete() drops - * the AIL lock. - */ - if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_item_done(bp); - xfs_buf_inode_iodone(bp); - ASSERT(list_empty(&bp->b_li_list)); - } else { - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - } + xfs_buf_item_finish_stale(bip); xfs_buf_relse(bp); return; } @@ -543,43 +612,42 @@ xfs_buf_item_push( * Drop the buffer log item refcount and take appropriate action. This helper * determines whether the bli must be freed or not, since a decrement to zero * does not necessarily mean the bli is unused. - * - * Return true if the bli is freed, false otherwise. */ -bool +void xfs_buf_item_put( struct xfs_buf_log_item *bip) { - struct xfs_log_item *lip = &bip->bli_item; - bool aborted; - bool dirty; + + ASSERT(xfs_buf_islocked(bip->bli_buf)); /* drop the bli ref and return if it wasn't the last one */ if (!atomic_dec_and_test(&bip->bli_refcount)) - return false; + return; - /* - * We dropped the last ref and must free the item if clean or aborted. - * If the bli is dirty and non-aborted, the buffer was clean in the - * transaction but still awaiting writeback from previous changes. In - * that case, the bli is freed on buffer writeback completion. - */ - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xlog_is_shutdown(lip->li_log); - dirty = bip->bli_flags & XFS_BLI_DIRTY; - if (dirty && !aborted) - return false; + /* If the BLI is in the AIL, then it is still dirty and in use */ + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); + return; + } /* - * The bli is aborted or clean. An aborted item may be in the AIL - * regardless of dirty state. For example, consider an aborted - * transaction that invalidated a dirty bli and cleared the dirty - * state. + * In shutdown conditions, we can be asked to free a dirty BLI that + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI + * instead of inserting it into the AIL at checkpoint IO completion. If + * there's another bli reference (e.g. a btree cursor holds a clean + * reference) and it is released via xfs_trans_brelse(), we can get here + * with that aborted, dirty BLI. In this case, it is safe to free the + * dirty BLI immediately, as it is not in the AIL and there are no + * other references to it. + * + * We should never get here with a stale BLI via that path as + * xfs_trans_brelse() specifically holds onto stale buffers rather than + * releasing them. */ - if (aborted) - xfs_trans_ail_delete(lip, 0); - xfs_buf_item_relse(bip->bli_buf); - return true; + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + xfs_buf_item_relse(bip); } /* @@ -600,6 +668,15 @@ xfs_buf_item_put( * if necessary but do not unlock the buffer. This is for support of * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't * free the item. + * + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* + * perform a completion abort of any objects attached to the buffer for IO + * tracking purposes. This generally only happens in shutdown situations, + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform + * completion processing. However, because transaction completion can race with + * checkpoint completion during a shutdown, this release context may end up + * being the last active reference to the BLI and so needs to perform this + * cleanup. */ STATIC void xfs_buf_item_release( @@ -607,18 +684,19 @@ xfs_buf_item_release( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool released; bool hold = bip->bli_flags & XFS_BLI_HOLD; bool stale = bip->bli_flags & XFS_BLI_STALE; -#if defined(DEBUG) || defined(XFS_WARN) - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; bool aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; +#if defined(DEBUG) || defined(XFS_WARN) + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; #endif trace_xfs_buf_item_release(bip); + ASSERT(xfs_buf_islocked(bp)); + /* * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. @@ -634,16 +712,56 @@ xfs_buf_item_release( bp->b_transp = NULL; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); + /* If there are other references, then we have nothing to do. */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + goto out_release; + + /* + * Stale buffer completion frees the BLI, unlocks and releases the + * buffer. Neither the BLI or buffer are safe to reference after this + * call, so there's nothing more we need to do here. + * + * If we get here with a stale buffer and references to the BLI remain, + * we must not unlock the buffer as the last BLI reference owns lock + * context, not us. + */ + if (stale) { + xfs_buf_item_finish_stale(bip); + xfs_buf_relse(bp); + ASSERT(!hold); + return; + } + /* - * Unref the item and unlock the buffer unless held or stale. Stale - * buffers remain locked until final unpin unless the bli is freed by - * the unref call. The latter implies shutdown because buffer - * invalidation dirties the bli and transaction. + * Dirty or clean, aborted items are done and need to be removed from + * the AIL and released. This frees the BLI, but leaves the buffer + * locked and referenced. */ - released = xfs_buf_item_put(bip); - if (hold || (stale && !released)) + if (aborted || xlog_is_shutdown(lip->li_log)) { + ASSERT(list_empty(&bip->bli_buf->b_li_list)); + xfs_buf_item_done(bp); + goto out_release; + } + + /* + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer + * locked and referenced. + * + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. + */ + if (!dirty) + xfs_buf_item_relse(bip); + else + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); + + /* Not safe to reference the BLI from here */ +out_release: + /* + * If we get here with a stale buffer, we must not unlock the + * buffer as the last BLI reference owns lock context, not us. + */ + if (stale || hold) return; - ASSERT(!stale || aborted); xfs_buf_relse(bp); } @@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC void -xfs_buf_item_get_format( - struct xfs_buf_log_item *bip, - int count) -{ - ASSERT(bip->bli_formats == NULL); - bip->bli_format_count = count; - - if (count == 1) { - bip->bli_formats = &bip->__bli_format; - return; - } - - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), - GFP_KERNEL | __GFP_NOFAIL); -} - -STATIC void -xfs_buf_item_free_format( - struct xfs_buf_log_item *bip) -{ - if (bip->bli_formats != &bip->__bli_format) { - kfree(bip->bli_formats); - bip->bli_formats = NULL; - } -} - /* * Allocate a new buf log item to go with the given buffer. * Set the buffer's b_log_item field to point to the new @@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format( return false; } -STATIC void -xfs_buf_item_free( - struct xfs_buf_log_item *bip) -{ - xfs_buf_item_free_format(bip); - kvfree(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_cache, bip); -} - -/* - * xfs_buf_item_relse() is called when the buf log item is no longer needed. - */ -void -xfs_buf_item_relse( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *bip = bp->b_log_item; - - trace_xfs_buf_item_relse(bp, _RET_IP_); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - - if (atomic_read(&bip->bli_refcount)) - return; - bp->b_log_item = NULL; - xfs_buf_rele(bp); - xfs_buf_item_free(bip); -} - void xfs_buf_item_done( struct xfs_buf *bp) @@ -1023,5 +1086,5 @@ xfs_buf_item_done( xfs_trans_ail_delete(&bp->b_log_item->bli_item, (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_relse(bp); + xfs_buf_item_relse(bp->b_log_item); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e10e324cd245..416890b84f8c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -49,8 +49,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_done(struct xfs_buf *bp); -void xfs_buf_item_relse(struct xfs_buf *); -bool xfs_buf_item_put(struct xfs_buf_log_item *); +void xfs_buf_item_put(struct xfs_buf_log_item *bip); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4e32f0860b7..0bd8022e47b4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1398,11 +1398,9 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); - - xfs_qm_dqunpin_wait(dqp); - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..0b41b18debf3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 726e29b837e6..bbc2f2973dcc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -979,7 +979,15 @@ xfs_reclaim_inode( */ if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); + /* + * Avoid a ABBA deadlock on the inode cluster buffer vs + * concurrent xfs_ifree_cluster() trying to mark the inode + * stale. We don't need the inode locked to run the flush abort + * code, but the flush abort needs to lock the cluster buffer. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iflush_shutdown_abort(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee3e0f284287..761a996a857c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1635,7 +1635,7 @@ retry: iip = ip->i_itemp; if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); - ASSERT(iip->ili_last_fields); + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); goto out_iunlock; } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c6cb0b6b9e46..285e27ff89e2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -758,11 +758,14 @@ xfs_inode_item_push( * completed and items removed from the AIL before the next push * attempt. */ + trace_xfs_inode_push_stale(ip, _RET_IP_); return XFS_ITEM_PINNED; } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { + trace_xfs_inode_push_pinned(ip, _RET_IP_); return XFS_ITEM_PINNED; + } if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f66d2d430e4f..a80cb6b9969a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -793,8 +793,10 @@ xlog_cil_ail_insert( struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; - if (aborted) + if (aborted) { + trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 08443ceec329..866c71d9fbae 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -320,7 +320,7 @@ xfs_mru_cache_create( xfs_mru_cache_free_func_t free_func) { struct xfs_mru_cache *mru = NULL; - int err = 0, grp; + int grp; unsigned int grp_time; if (mrup) @@ -341,8 +341,8 @@ xfs_mru_cache_create( mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), GFP_KERNEL | __GFP_NOFAIL); if (!mru->lists) { - err = -ENOMEM; - goto exit; + kfree(mru); + return -ENOMEM; } for (grp = 0; grp < mru->grp_count; grp++) @@ -361,14 +361,7 @@ xfs_mru_cache_create( mru->free_func = free_func; mru->data = data; *mrup = mru; - -exit: - if (err && mru && mru->lists) - kfree(mru->lists); - if (err && mru) - kfree(mru); - - return err; + return 0; } /* @@ -425,10 +418,6 @@ xfs_mru_cache_insert( { int error = -EINVAL; - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - goto out_free; - error = -ENOMEM; if (radix_tree_preload(GFP_KERNEL)) goto out_free; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 417439b58785..fa135ac26471 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,6 +134,7 @@ xfs_qm_dqpurge( dqp->q_flags |= XFS_DQFLAG_FREEING; + xfs_qm_dqunpin_wait(dqp); xfs_dqflock(dqp); /* @@ -465,6 +466,7 @@ xfs_qm_dquot_isolate( struct xfs_dquot *dqp = container_of(item, struct xfs_dquot, q_lru); struct xfs_qm_isolate *isol = arg; + enum lru_status ret = LRU_SKIP; if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; @@ -478,6 +480,16 @@ xfs_qm_dquot_isolate( goto out_miss_unlock; /* + * If the dquot is pinned or dirty, rotate it to the end of the LRU to + * give some time for it to be cleaned before we try to isolate it + * again. + */ + ret = LRU_ROTATE; + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { + goto out_miss_unlock; + } + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -492,41 +504,14 @@ xfs_qm_dquot_isolate( } /* - * If the dquot is dirty, flush it. If it's already being flushed, just - * skip it so there is time for the IO to complete before we try to - * reclaim it again on the next LRU pass. + * The dquot may still be under IO, in which case the flush lock will be + * held. If we can't get the flush lock now, just skip over the dquot as + * if it was dirty. */ if (!xfs_dqflock_nowait(dqp)) goto out_miss_unlock; - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* we have to drop the LRU lock to flush the dquot */ - spin_unlock(&lru->lock); - - error = xfs_dquot_use_attached_buf(dqp, &bp); - if (!bp || error == -EAGAIN) { - xfs_dqfunlock(dqp); - goto out_unlock_dirty; - } - - /* - * dqflush completes dqflock on error, and the delwri ioend - * does it on success. - */ - error = xfs_qm_dqflush(dqp, bp); - if (error) - goto out_unlock_dirty; - - xfs_buf_delwri_queue(bp, &isol->buffers); - xfs_buf_relse(bp); - goto out_unlock_dirty; - } - + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); @@ -548,13 +533,7 @@ out_miss_unlock: out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - return LRU_SKIP; - -out_unlock_dirty: - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - xfs_dqunlock(dqp); - return LRU_RETRY; + return ret; } static unsigned long @@ -1486,7 +1465,6 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1497,34 +1475,8 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - /* - * The only way the dquot is already flush locked by the time quotacheck - * gets here is if reclaim flushed it before the dqadjust walk dirtied - * it for the final time. Quotacheck collects all dquot bufs in the - * local delwri queue before dquots are dirtied, so reclaim can't have - * possibly queued it for I/O. The only way out is to push the buffer to - * cycle the flush lock. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* buf is pinned in-core by delwri list */ - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - goto out_unlock; - - if (!(bp->b_flags & _XBF_DELWRI_Q)) { - error = -EAGAIN; - xfs_buf_relse(bp); - goto out_unlock; - } - xfs_buf_unlock(bp); - - xfs_buf_delwri_pushbuf(bp, buffer_list); - xfs_buf_rele(bp); - - error = -EAGAIN; - goto out_unlock; - } + xfs_qm_dqunpin_wait(dqp); + xfs_dqflock(dqp); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6484c596ecea..736eb0924573 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom( kfree(nmp); + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); + if (min_logfsbs > mp->m_sb.sb_logblocks) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0bc4b5489078..bb0a82635a77 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2020,14 +2020,13 @@ xfs_remount_rw( int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { + xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } - if (mp->m_rtdev_targp && - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 01d284a1c759..ba45d801df1c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); @@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) + __field(unsigned long, iflags) __field(unsigned long, caller_ip) ), TP_fast_assign( @@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); + __entry->iflags = ip->i_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, + __entry->iflags, (char *)__entry->caller_ip) ) @@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_inode_push_pinned); +DEFINE_IREF_EVENT(xfs_inode_push_stale); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), @@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); +DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); +DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c6657072361a..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -742,8 +742,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 80add26c0111..01315ed75502 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -727,7 +727,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -777,26 +777,6 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -static void -xfs_submit_zoned_bio( - struct iomap_ioend *ioend, - struct xfs_open_zone *oz, - bool is_seq) -{ - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; - ioend->io_private = oz; - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ - - if (is_seq) { - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; - } else { - xfs_mark_rtg_boundary(ioend); - } - - submit_bio(&ioend->io_bio); -} - /* * Cache the last zone written to for an inode so that it is considered first * for subsequent writes. @@ -891,6 +871,26 @@ xfs_zone_cache_create_association( xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); } +static void +xfs_submit_zoned_bio( + struct iomap_ioend *ioend, + struct xfs_open_zone *oz, + bool is_seq) +{ + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; + ioend->io_private = oz; + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ + + if (is_seq) { + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; + } else { + xfs_mark_rtg_boundary(ioend); + } + + submit_bio(&ioend->io_bio); +} + void xfs_zone_alloc_and_submit( struct iomap_ioend *ioend, diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5111ec040c53..73648d26a622 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1666,11 +1666,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) io_req_flags_t io_file_get_flags(struct file *file) { + struct inode *inode = file_inode(file); io_req_flags_t res = 0; BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); - if (S_ISREG(file_inode(file)->i_mode)) + if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE)) res |= REQ_F_ISREG; if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) res |= REQ_F_SUPPORT_NOWAIT; diff --git a/lib/test_objagg.c b/lib/test_objagg.c index d34df4306b87..222b39fc2629 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, int err; stats = objagg_hints_stats_get(objagg_hints); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_hints_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66052d6aaa1d..4d5ace9d245d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_set_ext_adv_params *rp = data; - struct hci_cp_le_set_ext_adv_params *cp; - struct adv_info *adv_instance; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - hdev->adv_addr_type = cp->own_addr_type; - if (!cp->handle) { - /* Store in hdev for instance 0 */ - hdev->adv_tx_power = rp->tx_power; - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - adv_instance->tx_power = rp->tx_power; - } - /* Update adv data as tx power is known now */ - hci_update_adv_data(hdev, cp->handle); - - hci_dev_unlock(hdev); - - return rp->status; -} - static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4164,8 +4130,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, hci_cc_le_read_num_adv_sets, sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, hci_cc_le_set_ext_adv_enable), HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6687f2a4d1eb..77b3691f3423 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +static int +hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv, + const struct hci_cp_le_set_ext_adv_params *cp, + struct hci_rp_le_set_ext_adv_params *rp) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp), + cp, HCI_CMD_TIMEOUT); + + /* If command return a status event, skb will be set to -ENODATA */ + if (skb == ERR_PTR(-ENODATA)) + return 0; + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", + HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->len != sizeof(*rp)) { + bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u", + HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len); + kfree_skb(skb); + return -EIO; + } + + memcpy(rp, skb->data, sizeof(*rp)); + kfree_skb(skb); + + if (!rp->status) { + hdev->adv_addr_type = cp->own_addr_type; + if (!cp->handle) { + /* Store in hdev for instance 0 */ + hdev->adv_tx_power = rp->tx_power; + } else if (adv) { + adv->tx_power = rp->tx_power; + } + } + + return rp->status; +} + +static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); + u8 len; + struct adv_info *adv = NULL; + int err; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } + + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); + + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + struct_size(pdu, data, len), pdu, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu->data, len); + hdev->adv_data_len = len; + } + + return 0; +} + +static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_adv_data cp; + u8 len; + + memset(&cp, 0, sizeof(cp)); + + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return 0; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return 0; + + if (ext_adv_capable(hdev)) + return hci_set_ext_adv_data_sync(hdev, instance); + + return hci_set_adv_data_sync(hdev, instance); +} + int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; bool connectable; u32 flags; bdaddr_t random_addr; @@ -1316,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.secondary_phy = HCI_ADV_PHY_1M; } - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -1822,79 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, - HCI_MAX_EXT_AD_LENGTH); - u8 len; - struct adv_info *adv = NULL; - int err; - - if (instance) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->adv_data_changed) - return 0; - } - - len = eir_create_adv_data(hdev, instance, pdu->data, - HCI_MAX_EXT_AD_LENGTH); - - pdu->length = len; - pdu->handle = adv ? adv->handle : instance; - pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - struct_size(pdu, data, len), pdu, - HCI_CMD_TIMEOUT); - if (err) - return err; - - /* Update data if the command succeed */ - if (adv) { - adv->adv_data_changed = false; - } else { - memcpy(hdev->adv_data, pdu->data, len); - hdev->adv_data_len = len; - } - - return 0; -} - -static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - struct hci_cp_le_set_adv_data cp; - u8 len; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return 0; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - -int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return 0; - - if (ext_adv_capable(hdev)) - return hci_set_ext_adv_data_sync(hdev, instance); - - return hci_set_adv_data_sync(hdev, instance); -} - int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { @@ -1970,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; - int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - err = hci_clear_adv_sets_sync(hdev, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_clear_adv_sets_sync(hdev, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2004,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err = 0; + int err; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_remove_ext_adv_instance_sync(hdev, instance, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2109,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; - int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - err = hci_disable_ext_adv_instance_sync(hdev, 0x00); - if (ext_adv_capable(hdev)) - return err; + return hci_disable_ext_adv_instance_sync(hdev, 0x00); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -2481,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev) int err; int old_state; + /* If controller is not advertising we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; @@ -6277,6 +6321,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; int err; bdaddr_t random_addr; u8 own_addr_type; @@ -6318,8 +6363,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, if (err) return err; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d540f7b4f75f..1485b455ade4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1080,7 +1080,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data) struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); - hci_disable_advertising_sync(hdev); + if (list_empty(&hdev->adv_instances)) + hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) @@ -2153,6 +2154,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) else hci_dev_clear_flag(hdev, HCI_MESH); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); + len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ @@ -2167,6 +2171,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; + __u16 period, window; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -2180,6 +2185,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); + /* Keep allowed ranges in sync with set_scan_params() */ + period = __le16_to_cpu(cp->period); + + if (period < 0x0004 || period > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + window = __le16_to_cpu(cp->window); + + if (window < 0x0004 || window > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + if (window > period) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); @@ -6432,6 +6454,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); + /* Keep allowed ranges in sync with set_mesh() */ interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 30a5e9460d00..5a49eb99e5c4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -319,8 +319,8 @@ static int ip_rcv_finish_core(struct net *net, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int err, drop_reason; struct rtable *rt; + int drop_reason; if (ip_can_use_hint(skb, iph, hint)) { drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, @@ -345,9 +345,10 @@ static int ip_rcv_finish_core(struct net *net, break; case IPPROTO_UDP: if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { - err = udp_v4_early_demux(skb); - if (unlikely(err)) + drop_reason = udp_v4_early_demux(skb); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 2dd6bd3a3011..b72bf8a08d48 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev) t = rose_node; rose_node = rose_node->next; - for (i = 0; i < t->count; i++) { + for (i = t->count - 1; i >= 0; i--) { if (t->neighbour[i] != s) continue; t->count--; - switch (i) { - case 0: - t->neighbour[0] = t->neighbour[1]; - fallthrough; - case 1: - t->neighbour[1] = t->neighbour[2]; - break; - case 2: - break; - } + memmove(&t->neighbour[i], &t->neighbour[i + 1], + sizeof(t->neighbour[0]) * + (t->count - i)); } if (t->count <= 0) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c5e3673aadbe..d8a33486c511 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -780,15 +780,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -797,17 +794,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -816,6 +804,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 0fa244f16876..7b943fbafcc3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1724,7 +1724,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len); /* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */ while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count)) - maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len); + maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) @@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* |