diff options
106 files changed, 4291 insertions, 1527 deletions
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt index c5254f6d234d..8c6ea7b41048 100644 --- a/Documentation/filesystems/afs.txt +++ b/Documentation/filesystems/afs.txt @@ -11,7 +11,7 @@ Contents: - Proc filesystem. - The cell database. - Security. - - Examples. + - The @sys substitution. ======== @@ -230,3 +230,29 @@ If a file is opened with a particular key and then the file descriptor is passed to a process that doesn't have that key (perhaps over an AF_UNIX socket), then the operations on the file will be made with key that was used to open the file. + + +===================== +THE @SYS SUBSTITUTION +===================== + +The list of up to 16 @sys substitutions for the current network namespace can +be configured by writing a list to /proc/fs/afs/sysname: + + [root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname + +or cleared entirely by writing an empty list: + + [root@andromeda ~]# echo >/proc/fs/afs/sysname + +The current list for current network namespace can be retrieved by: + + [root@andromeda ~]# cat /proc/fs/afs/sysname + foo + amd64_linux_26 + +When @sys is being substituted for, each element of the list is tried in the +order given. + +By default, the list will contain one item that conforms to the pattern +"<arch>_linux_26", amd64 being the name for x86_64. diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 114b93488193..5de871eb4a59 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus); struct vm_area_struct; -/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ -#define HAVE_PCI_MMAP 1 -#define arch_can_pci_mmap_io() 1 +/* Tell PCI code what kind of PCI resource mappings we support */ +#define HAVE_PCI_MMAP 1 +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_io() 1 extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t count); diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index e53b8532353c..db8b1fa83452 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -33,6 +33,8 @@ extern int mem_init_done; #define PAGE_KERNEL __pgprot(0) /* these mean nothing to non MMU */ #define pgprot_noncached(x) (x) +#define pgprot_writecombine pgprot_noncached +#define pgprot_device pgprot_noncached #define __swp_type(x) (0) #define __swp_offset(x) (0) diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index ae79e8638d50..161f9758c631 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev) } /* - * Platform support for /proc/bus/pci/X/Y mmap()s, - * modelled on the sparc64 implementation by Dave Miller. - * -- paulus. + * Platform support for /proc/bus/pci/X/Y mmap()s. */ -/* - * Adjust vm_pgoff of VMA such that it is the physical page offset - * corresponding to the 32-bit pci bus offset for DEV requested by the user. - * - * Basically, the user finds the base address for his device which he wishes - * to mmap. They read the 32-bit value from the config space base register, - * add whatever PAGE_SIZE multiple offset they wish, and feed this into the - * offset parameter of mmap on /proc/bus/pci/XXX for that device. - * - * Returns negative error code on failure, zero on success. - */ -static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, - resource_size_t *offset, - enum pci_mmap_state mmap_state) +int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - unsigned long io_offset = 0; - int i, res_bit; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + resource_size_t ioaddr = pci_resource_start(pdev, bar); if (!hose) - return NULL; /* should never happen */ - - /* If memory, add on the PCI bridge address offset */ - if (mmap_state == pci_mmap_mem) { -#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ - *offset += hose->pci_mem_offset; -#endif - res_bit = IORESOURCE_MEM; - } else { - io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; - *offset += io_offset; - res_bit = IORESOURCE_IO; - } - - /* - * Check that the offset requested corresponds to one of the - * resources of the device. - */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &dev->resource[i]; - int flags = rp->flags; + return -EINVAL; /* should never happen */ - /* treat ROM as memory (should be already) */ - if (i == PCI_ROM_RESOURCE) - flags |= IORESOURCE_MEM; - - /* Active and same type? */ - if ((flags & res_bit) == 0) - continue; - - /* In the range of this resource? */ - if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) - continue; - - /* found it! construct the final physical address */ - if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; - return rp; - } + /* Convert to an offset within this PCI controller */ + ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE; - return NULL; + vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT; + return 0; } /* @@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, return prot; } -/* - * Perform the actual remap of the pages for a PCI device mapping, as - * appropriate for this architecture. The region in the process to map - * is described by vm_start and vm_end members of VMA, the base physical - * address is found in vm_pgoff. - * The pci device structure is provided so that architectures may make mapping - * decisions on a per-device or per-bus basis. - * - * Returns a negative error code on failure, zero on success. - */ -int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - resource_size_t offset = - ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; - struct resource *rp; - int ret; - - rp = __pci_mmap_make_offset(dev, &offset, mmap_state); - if (rp == NULL) - return -EINVAL; - - vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot); - - return ret; -} - /* This provides legacy IO read access on a bus */ int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3c2c2530737e..c36d23aa6c35 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Get mfn list */ xen_build_dynamic_phys_to_machine(); @@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void) */ xen_setup_gdt(0); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + xen_init_irq_ops(); /* Let's presume PV guests always boot on vCPU with id 0. */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 96f26e026783..5077ead5e59c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -89,7 +89,9 @@ END(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, - .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0)) + .long (1 << XENFEAT_writable_page_tables) | \ + (1 << XENFEAT_dom0) | \ + (1 << XENFEAT_linux_rsdp_unrestricted)) ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index c7cf48ad5cb9..a651ab3490d8 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module) EXPORT_SYMBOL(acpi_processor_notify_smm); -static int acpi_processor_get_psd(struct acpi_processor *pr) +int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain) { int result = 0; acpi_status status = AE_OK; @@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"}; struct acpi_buffer state = {0, NULL}; union acpi_object *psd = NULL; - struct acpi_psd_package *pdomain; - status = acpi_evaluate_object(pr->handle, "_PSD", NULL, &buffer); + status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer); if (ACPI_FAILURE(status)) { return -ENODEV; } @@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) goto end; } - pdomain = &(pr->performance->domain_info); - state.length = sizeof(struct acpi_psd_package); state.pointer = pdomain; @@ -597,6 +594,7 @@ end: kfree(buffer.pointer); return result; } +EXPORT_SYMBOL(acpi_processor_get_psd); int acpi_processor_preregister_performance( struct acpi_processor_performance __percpu *performance) @@ -645,7 +643,8 @@ int acpi_processor_preregister_performance( pr->performance = per_cpu_ptr(performance, i); cpumask_set_cpu(i, pr->performance->shared_cpu_map); - if (acpi_processor_get_psd(pr)) { + pdomain = &(pr->performance->domain_info); + if (acpi_processor_get_psd(pr->handle, pdomain)) { retval = -EINVAL; continue; } diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 24108bfad889..6193270e7b3d 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -400,10 +400,14 @@ static void skip_back_repeat_test(char *arg) int go_back = simple_strtol(arg, NULL, 10); repeat_test--; - if (repeat_test <= 0) + if (repeat_test <= 0) { ts.idx++; - else + } else { + if (repeat_test % 100 == 0) + v1printk("kgdbts:RUN ... %d remaining\n", repeat_test); + ts.idx -= go_back; + } fill_get_buf(ts.tst[ts.idx].get); } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 02485e310c81..9e923cd1d80e 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -3080,6 +3080,7 @@ static void __exit mmc_blk_exit(void) mmc_unregister_driver(&mmc_driver); unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); + bus_unregister(&mmc_rpmb_bus_type); } module_init(mmc_blk_init); diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 712e08d9a45e..a0168e9e4fce 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -362,9 +362,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host, host->irq_mask &= ~irq; else host->irq_mask |= irq; - spin_unlock_irqrestore(&host->lock, flags); writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK); + spin_unlock_irqrestore(&host->lock, flags); } static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host, diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index e30df9ad8197..308029930304 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -913,7 +913,7 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) host->check_scc_error(host); /* If SET_BLOCK_COUNT, continue with main command */ - if (host->mrq) { + if (host->mrq && !mrq->cmd->error) { tmio_process_mrq(host, mrq); return; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index c96a92118b8b..32f6d2e24d66 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -951,9 +951,11 @@ void aq_nic_shutdown(struct aq_nic_s *self) netif_device_detach(self->ndev); - err = aq_nic_stop(self); - if (err < 0) - goto err_exit; + if (netif_running(self->ndev)) { + err = aq_nic_stop(self); + if (err < 0) + goto err_exit; + } aq_nic_deinit(self); err_exit: diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 84d7f4dd4ce1..e652d86b87d4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -48,6 +48,8 @@ #define FORCE_FLASHLESS 0 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); +static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, + enum hal_atl_utils_fw_state_e state); int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) { @@ -247,6 +249,20 @@ int hw_atl_utils_soft_reset(struct aq_hw_s *self) self->rbl_enabled = (boot_exit_code != 0); + /* FW 1.x may bootup in an invalid POWER state (WOL feature). + * We should work around this by forcing its state back to DEINIT + */ + if (!hw_atl_utils_ver_match(HW_ATL_FW_VER_1X, + aq_hw_read_reg(self, + HW_ATL_MPI_FW_VERSION))) { + int err = 0; + + hw_atl_utils_mpi_set_state(self, MPI_DEINIT); + AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR) & + HW_ATL_MPI_STATE_MSK) == MPI_DEINIT, + 10, 1000U); + } + if (self->rbl_enabled) return hw_atl_utils_soft_reset_rbl(self); else diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1991f0c7bc0e..f83769d8047b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6090,7 +6090,7 @@ static void bnxt_free_irq(struct bnxt *bp) free_irq_cpu_rmap(bp->dev->rx_cpu_rmap); bp->dev->rx_cpu_rmap = NULL; #endif - if (!bp->irq_tbl) + if (!bp->irq_tbl || !bp->bnapi) return; for (i = 0; i < bp->cp_nr_rings; i++) { @@ -7686,6 +7686,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, if (bp->flags & BNXT_FLAG_AGG_RINGS) rx_rings <<= 1; cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx; + if (bp->flags & BNXT_FLAG_NEW_RM) + cp += bnxt_get_ulp_msix_num(bp); return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp, vnics); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8d8ccd67e0e2..1f622ca2a64f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -870,17 +870,22 @@ static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct bnxt *bp = netdev_priv(dev); - struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; + struct bnxt_vnic_info *vnic; int i = 0; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; - if (indir) + if (!bp->vnic_info) + return 0; + + vnic = &bp->vnic_info[0]; + if (indir && vnic->rss_table) { for (i = 0; i < HW_HASH_INDEX_SIZE; i++) indir[i] = le16_to_cpu(vnic->rss_table[i]); + } - if (key) + if (key && vnic->rss_hash_key) memcpy(key, vnic->rss_hash_key, HW_HASH_KEY_SIZE); return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 65c2cee35766..795f45024c20 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -377,6 +377,30 @@ static bool is_wildcard(void *mask, int len) return true; } +static bool is_exactmatch(void *mask, int len) +{ + const u8 *p = mask; + int i; + + for (i = 0; i < len; i++) + if (p[i] != 0xff) + return false; + + return true; +} + +static bool bits_set(void *key, int len) +{ + const u8 *p = key; + int i; + + for (i = 0; i < len; i++) + if (p[i] != 0) + return true; + + return false; +} + static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, __le16 ref_flow_handle, __le32 tunnel_handle, __le16 *flow_handle) @@ -764,6 +788,41 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) return false; } + /* Currently source/dest MAC cannot be partial wildcard */ + if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) && + !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) { + netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n"); + return false; + } + if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) && + !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) { + netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n"); + return false; + } + + /* Currently VLAN fields cannot be partial wildcard */ + if (bits_set(&flow->l2_key.inner_vlan_tci, + sizeof(flow->l2_key.inner_vlan_tci)) && + !is_exactmatch(&flow->l2_mask.inner_vlan_tci, + sizeof(flow->l2_mask.inner_vlan_tci))) { + netdev_info(bp->dev, "Wildcard match unsupported for VLAN TCI\n"); + return false; + } + if (bits_set(&flow->l2_key.inner_vlan_tpid, + sizeof(flow->l2_key.inner_vlan_tpid)) && + !is_exactmatch(&flow->l2_mask.inner_vlan_tpid, + sizeof(flow->l2_mask.inner_vlan_tpid))) { + netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n"); + return false; + } + + /* Currently Ethertype must be set */ + if (!is_exactmatch(&flow->l2_mask.ether_type, + sizeof(flow->l2_mask.ether_type))) { + netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n"); + return false; + } + return true; } @@ -992,8 +1051,10 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, /* Check if there's another flow using the same tunnel decap. * If not, add this tunnel to the table and resolve the other - * tunnel header fileds + * tunnel header fileds. Ignore src_port in the tunnel_key, + * since it is not required for decap filters. */ + decap_key->tp_src = 0; decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table, &tc_info->decap_ht_params, decap_key); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 26290403f38f..38f635cf8408 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -64,6 +64,31 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx) return rc; } +static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, + u16 *max_mtu) +{ + struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_func_qcfg_input req = {0}; + u16 mtu; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1); + req.fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid); + + mutex_lock(&bp->hwrm_cmd_lock); + + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + mtu = le16_to_cpu(resp->max_mtu_configured); + if (!mtu) + *max_mtu = BNXT_MAX_MTU; + else + *max_mtu = mtu; + } + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_vf_rep_open(struct net_device *dev) { struct bnxt_vf_rep *vf_rep = netdev_priv(dev); @@ -365,6 +390,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, struct net_device *dev) { struct net_device *pf_dev = bp->dev; + u16 max_mtu; dev->netdev_ops = &bnxt_vf_rep_netdev_ops; dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; @@ -380,6 +406,10 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx, dev->perm_addr); ether_addr_copy(dev->dev_addr, dev->perm_addr); + /* Set VF-Rep's max-mtu to the corresponding VF's max-mtu */ + if (!bnxt_hwrm_vfr_qcfg(bp, vf_rep, &max_mtu)) + dev->max_mtu = max_mtu; + dev->min_mtu = ETH_ZLEN; } static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[]) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 5782733959f0..f4e93f5fc204 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) if(x < 0 || x > comp->rslot_limit) goto bad; + /* Check if the cstate is initialized */ + if (!comp->rstate[x].initialized) + goto bad; + comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { @@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) if (cs->cs_tcp.doff > 5) memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; + cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a1ba262f40ad..28583aa0c17d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -743,8 +743,15 @@ static void __tun_detach(struct tun_file *tfile, bool clean) static void tun_detach(struct tun_file *tfile, bool clean) { + struct tun_struct *tun; + struct net_device *dev; + rtnl_lock(); + tun = rtnl_dereference(tfile->tun); + dev = tun ? tun->dev : NULL; __tun_detach(tfile, clean); + if (dev) + netdev_state_change(dev); rtnl_unlock(); } @@ -2562,10 +2569,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) /* One or more queue has already been attached, no need * to initialize the device again. */ + netdev_state_change(dev); return 0; } - } - else { + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + netdev_state_change(dev); + } else { char *name; unsigned long flags = 0; int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? @@ -2642,6 +2654,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); if (err < 0) @@ -2656,9 +2671,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun_debug(KERN_INFO, tun, "tun_set_iff\n"); - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - /* Make sure persistent devices do not get stuck in * xoff state. */ @@ -2805,6 +2817,9 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) } else ret = -EINVAL; + if (ret >= 0) + netdev_state_change(tun->dev); + unlock: rtnl_unlock(); return ret; @@ -2845,6 +2860,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned int ifindex; int le; int ret; + bool do_notify = false; if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { @@ -2941,10 +2957,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (arg && !(tun->flags & IFF_PERSIST)) { tun->flags |= IFF_PERSIST; __module_get(THIS_MODULE); + do_notify = true; } if (!arg && (tun->flags & IFF_PERSIST)) { tun->flags &= ~IFF_PERSIST; module_put(THIS_MODULE); + do_notify = true; } tun_debug(KERN_INFO, tun, "persist %s\n", @@ -2959,6 +2977,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->owner = owner; + do_notify = true; tun_debug(KERN_INFO, tun, "owner set to %u\n", from_kuid(&init_user_ns, tun->owner)); break; @@ -2971,6 +2990,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->group = group; + do_notify = true; tun_debug(KERN_INFO, tun, "group set to %u\n", from_kgid(&init_user_ns, tun->group)); break; @@ -3130,6 +3150,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } + if (do_notify) + netdev_state_change(tun->dev); + unlock: rtnl_unlock(); if (tun) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index fff4b13eece2..5c42cf81a08b 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -902,6 +902,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* Cinterion AHS3 modem by GEMALTO */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index aff105f5f58c..0867f7275852 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -928,7 +928,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset, offset += 0x100; else ret = -EINVAL; - ret = lan78xx_read_raw_otp(dev, offset, length, data); + if (!ret) + ret = lan78xx_read_raw_otp(dev, offset, length, data); } return ret; @@ -2502,7 +2503,7 @@ static void lan78xx_init_stats(struct lan78xx_net *dev) dev->stats.rollover_max.eee_tx_lpi_transitions = 0xFFFFFFFF; dev->stats.rollover_max.eee_tx_lpi_time = 0xFFFFFFFF; - lan78xx_defer_kevent(dev, EVENT_STAT_UPDATE); + set_bit(EVENT_STAT_UPDATE, &dev->flags); } static int lan78xx_open(struct net_device *net) @@ -2514,10 +2515,6 @@ static int lan78xx_open(struct net_device *net) if (ret < 0) goto out; - ret = lan78xx_reset(dev); - if (ret < 0) - goto done; - phy_start(net->phydev); netif_dbg(dev, ifup, dev->net, "phy initialised successfully"); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index bec722e41f58..f3bd8e941224 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -641,14 +641,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); -static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) +static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { u64 a = addr / VHOST_PAGE_SIZE / 8; /* Make sure 64 bit math will not overflow. */ if (a > ULONG_MAX - (unsigned long)log_base || a + (unsigned long)log_base > ULONG_MAX) - return 0; + return false; return access_ok(VERIFY_WRITE, log_base + a, (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); @@ -661,30 +661,30 @@ static bool vhost_overflow(u64 uaddr, u64 size) } /* Caller should have vq mutex and device mutex. */ -static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, - int log_all) +static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, + int log_all) { struct vhost_umem_node *node; if (!umem) - return 0; + return false; list_for_each_entry(node, &umem->umem_list, link) { unsigned long a = node->userspace_addr; if (vhost_overflow(node->userspace_addr, node->size)) - return 0; + return false; if (!access_ok(VERIFY_WRITE, (void __user *)a, node->size)) - return 0; + return false; else if (log_all && !log_access_ok(log_base, node->start, node->size)) - return 0; + return false; } - return 1; + return true; } static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, @@ -701,13 +701,13 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ -static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, - int log_all) +static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, + int log_all) { int i; for (i = 0; i < d->nvqs; ++i) { - int ok; + bool ok; bool log; mutex_lock(&d->vqs[i]->mutex); @@ -717,12 +717,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, ok = vq_memory_access_ok(d->vqs[i]->log_base, umem, log); else - ok = 1; + ok = true; mutex_unlock(&d->vqs[i]->mutex); if (!ok) - return 0; + return false; } - return 1; + return true; } static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, @@ -744,7 +744,7 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, struct iov_iter t; void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)to, size, - VHOST_ADDR_DESC); + VHOST_ADDR_USED); if (uaddr) return __copy_to_user(uaddr, from, size); @@ -959,21 +959,21 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, spin_unlock(&d->iotlb_lock); } -static int umem_access_ok(u64 uaddr, u64 size, int access) +static bool umem_access_ok(u64 uaddr, u64 size, int access) { unsigned long a = uaddr; /* Make sure 64 bit math will not overflow. */ if (vhost_overflow(uaddr, size)) - return -EFAULT; + return false; if ((access & VHOST_ACCESS_RO) && !access_ok(VERIFY_READ, (void __user *)a, size)) - return -EFAULT; + return false; if ((access & VHOST_ACCESS_WO) && !access_ok(VERIFY_WRITE, (void __user *)a, size)) - return -EFAULT; - return 0; + return false; + return true; } static int vhost_process_iotlb_msg(struct vhost_dev *dev, @@ -988,7 +988,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, ret = -EFAULT; break; } - if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) { + if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { ret = -EFAULT; break; } @@ -1135,10 +1135,10 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) return 0; } -static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) +static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, + struct vring_desc __user *desc, + struct vring_avail __user *avail, + struct vring_used __user *used) { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; @@ -1161,8 +1161,8 @@ static void vhost_vq_meta_update(struct vhost_virtqueue *vq, vq->meta_iotlb[type] = node; } -static int iotlb_access_ok(struct vhost_virtqueue *vq, - int access, u64 addr, u64 len, int type) +static bool iotlb_access_ok(struct vhost_virtqueue *vq, + int access, u64 addr, u64 len, int type) { const struct vhost_umem_node *node; struct vhost_umem *umem = vq->iotlb; @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ -int vhost_log_access_ok(struct vhost_dev *dev) +bool vhost_log_access_ok(struct vhost_dev *dev) { return memory_access_ok(dev, dev->umem, 1); } @@ -1228,8 +1228,8 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok); /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_virtqueue *vq, - void __user *log_base) +static bool vq_log_access_ok(struct vhost_virtqueue *vq, + void __user *log_base) { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; @@ -1242,12 +1242,14 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ -int vhost_vq_access_ok(struct vhost_virtqueue *vq) +bool vhost_vq_access_ok(struct vhost_virtqueue *vq) { - int ret = vq_log_access_ok(vq, vq->log_base); + if (!vq_log_access_ok(vq, vq->log_base)) + return false; - if (ret || vq->iotlb) - return ret; + /* Access validation occurs at prefetch time with IOTLB */ + if (vq->iotlb) + return true; return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d8ee85ae8fdc..6c844b90a168 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -178,8 +178,8 @@ void vhost_dev_cleanup(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); -int vhost_vq_access_ok(struct vhost_virtqueue *vq); -int vhost_log_access_ok(struct vhost_dev *); +bool vhost_vq_access_ok(struct vhost_virtqueue *vq); +bool vhost_log_access_ok(struct vhost_dev *); int vhost_get_vq_desc(struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 23e391d3ec01..b29f4e40851f 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -53,6 +53,8 @@ static unsigned long *acpi_ids_done; static unsigned long *acpi_id_present; /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ static unsigned long *acpi_id_cst_present; +/* Which ACPI P-State dependencies for a enumerated processor */ +static struct acpi_psd_package *acpi_psd; static int push_cxx_to_hypervisor(struct acpi_processor *_pr) { @@ -362,9 +364,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) } /* There are more ACPI Processor objects than in x2APIC or MADT. * This can happen with incorrect ACPI SSDT declerations. */ - if (acpi_id > nr_acpi_bits) { - pr_debug("We only have %u, trying to set %u\n", - nr_acpi_bits, acpi_id); + if (acpi_id >= nr_acpi_bits) { + pr_debug("max acpi id %u, trying to set %u\n", + nr_acpi_bits - 1, acpi_id); return AE_OK; } /* OK, There is a ACPI Processor object */ @@ -372,6 +374,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk); + /* It has P-state dependencies */ + if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) { + pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n", + acpi_id, acpi_psd[acpi_id].coord_type, + acpi_psd[acpi_id].domain); + } + status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { if (!pblk) @@ -405,6 +414,14 @@ static int check_acpi_ids(struct acpi_processor *pr_backup) return -ENOMEM; } + acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package), + GFP_KERNEL); + if (!acpi_psd) { + kfree(acpi_id_present); + kfree(acpi_id_cst_present); + return -ENOMEM; + } + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, read_acpi_id, NULL, NULL, NULL); @@ -417,6 +434,12 @@ upload: pr_backup->acpi_id = i; /* Mask out C-states if there are no _CST or PBLK */ pr_backup->flags.power = test_bit(i, acpi_id_cst_present); + /* num_entries is non-zero if we evaluated _PSD */ + if (acpi_psd[i].num_entries) { + memcpy(&pr_backup->performance->domain_info, + &acpi_psd[i], + sizeof(struct acpi_psd_package)); + } (void)upload_pm_data(pr_backup); } } @@ -566,6 +589,7 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); + kfree(acpi_psd); for_each_possible_cpu(i) acpi_processor_unregister_performance(i); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index a493e99bed21..0d6d9264d6a9 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -365,7 +365,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) if (WARN_ON(rc)) goto out; } - } else if (req->msg.type == XS_TRANSACTION_END) { + } else if (req->type == XS_TRANSACTION_END) { trans = xenbus_get_transaction(u, req->msg.tx_id); if (WARN_ON(!trans)) goto out; @@ -429,6 +429,10 @@ static int xenbus_write_transaction(unsigned msg_type, { int rc; struct xenbus_transaction_holder *trans = NULL; + struct { + struct xsd_sockmsg hdr; + char body[]; + } *msg = (void *)u->u.buffer; if (msg_type == XS_TRANSACTION_START) { trans = kzalloc(sizeof(*trans), GFP_KERNEL); @@ -437,11 +441,15 @@ static int xenbus_write_transaction(unsigned msg_type, goto out; } list_add(&trans->list, &u->transactions); - } else if (u->u.msg.tx_id != 0 && - !xenbus_get_transaction(u, u->u.msg.tx_id)) + } else if (msg->hdr.tx_id != 0 && + !xenbus_get_transaction(u, msg->hdr.tx_id)) return xenbus_command_reply(u, XS_ERROR, "ENOENT"); + else if (msg_type == XS_TRANSACTION_END && + !(msg->hdr.len == 2 && + (!strcmp(msg->body, "T") || !strcmp(msg->body, "F")))) + return xenbus_command_reply(u, XS_ERROR, "EINVAL"); - rc = xenbus_dev_request_and_reply(&u->u.msg, u); + rc = xenbus_dev_request_and_reply(&msg->hdr, u); if (rc && trans) { list_del(&trans->list); kfree(trans); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3f3b29398ab8..49a3874ae6bb 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -140,7 +140,9 @@ void xs_request_exit(struct xb_req_data *req) spin_lock(&xs_state_lock); xs_state_users--; if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) || - req->type == XS_TRANSACTION_END) + (req->type == XS_TRANSACTION_END && + !WARN_ON_ONCE(req->msg.type == XS_ERROR && + !strcmp(req->body, "ENOENT")))) xs_state_users--; spin_unlock(&xs_state_lock); diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 45b7fc405fa6..532acae25453 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -12,6 +12,8 @@ kafs-objs := \ cell.o \ cmservice.o \ dir.o \ + dir_edit.o \ + dynroot.o \ file.o \ flock.o \ fsclient.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index fd9f28b8a933..3bedfed608a2 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -243,9 +243,9 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) xport == a->sin6_port) return; if (xdr == a->sin6_addr.s6_addr32[3] && - xport < a->sin6_port) + (u16 __force)xport < (u16 __force)a->sin6_port) break; - if (xdr < a->sin6_addr.s6_addr32[3]) + if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3]) break; } @@ -280,7 +280,7 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) xport == a->sin6_port) return; if (diff == 0 && - xport < a->sin6_port) + (u16 __force)xport < (u16 __force)a->sin6_port) break; if (diff < 0) break; diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b94d0edc2b78..b4ff1f7ae4ab 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -67,10 +67,14 @@ typedef enum { } afs_callback_type_t; struct afs_callback { - struct afs_fid fid; /* file identifier */ - unsigned version; /* callback version */ - unsigned expiry; /* time at which expires */ - afs_callback_type_t type; /* type of callback */ + unsigned version; /* Callback version */ + unsigned expiry; /* Time at which expires */ + afs_callback_type_t type; /* Type of callback */ +}; + +struct afs_callback_break { + struct afs_fid fid; /* File identifier */ + struct afs_callback cb; /* Callback details */ }; #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ @@ -123,21 +127,20 @@ typedef u32 afs_access_t; * AFS file status information */ struct afs_file_status { - unsigned if_version; /* interface version */ -#define AFS_FSTATUS_VERSION 1 + u64 size; /* file size */ + afs_dataversion_t data_version; /* current data version */ + time_t mtime_client; /* last time client changed data */ + time_t mtime_server; /* last time server changed data */ + unsigned abort_code; /* Abort if bulk-fetching this failed */ afs_file_type_t type; /* file type */ unsigned nlink; /* link count */ - u64 size; /* file size */ - afs_dataversion_t data_version; /* current data version */ u32 author; /* author ID */ - kuid_t owner; /* owner ID */ - kgid_t group; /* group ID */ + u32 owner; /* owner ID */ + u32 group; /* group ID */ afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ - time_t mtime_client; /* last time client changed data */ - time_t mtime_server; /* last time server changed data */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ }; diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index d47b6d01e4c0..ddfa88a7a9c0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -31,10 +31,12 @@ enum AFS_FS_Operations { FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ FSGETROOTVOLUME = 151, /* AFS Get root volume name */ + FSBULKSTATUS = 155, /* AFS Fetch multiple file statuses */ FSSETLOCK = 156, /* AFS Request a file lock */ FSEXTENDLOCK = 157, /* AFS Extend a file lock */ FSRELEASELOCK = 158, /* AFS Release a file lock */ FSLOOKUP = 161, /* AFS lookup file in directory */ + FSINLINEBULKSTATUS = 65536, /* AFS Fetch multiple file statuses with inline errors */ FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index f4291b576054..abd9a84f4e88 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -97,26 +97,6 @@ again: } /* - * Set a vnode's interest on a server. - */ -void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi) -{ - struct afs_cb_interest *old_cbi = NULL; - - if (vnode->cb_interest == cbi) - return; - - write_seqlock(&vnode->cb_lock); - if (vnode->cb_interest != cbi) { - afs_get_cb_interest(cbi); - old_cbi = vnode->cb_interest; - vnode->cb_interest = cbi; - } - write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), cbi); -} - -/* * Remove an interest on a server. */ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) @@ -150,6 +130,7 @@ void afs_break_callback(struct afs_vnode *vnode) write_seqlock(&vnode->cb_lock); + clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; afs_clear_permits(vnode); @@ -207,7 +188,7 @@ static void afs_break_one_callback(struct afs_server *server, * allow the fileserver to break callback promises */ void afs_break_callbacks(struct afs_server *server, size_t count, - struct afs_callback callbacks[]) + struct afs_callback_break *callbacks) { _enter("%p,%zu,", server, count); @@ -219,9 +200,9 @@ void afs_break_callbacks(struct afs_server *server, size_t count, callbacks->fid.vid, callbacks->fid.vnode, callbacks->fid.unique, - callbacks->version, - callbacks->expiry, - callbacks->type + callbacks->cb.version, + callbacks->cb.expiry, + callbacks->cb.type ); afs_break_one_callback(server, &callbacks->fid); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 4235a05afc76..fdf4c36cff79 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -18,7 +18,7 @@ #include <keys/rxrpc-type.h> #include "internal.h" -unsigned __read_mostly afs_cell_gc_delay = 10; +static unsigned __read_mostly afs_cell_gc_delay = 10; static void afs_manage_cell(struct work_struct *); @@ -75,7 +75,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, cell = rcu_dereference_raw(net->ws_cell); if (cell) { afs_get_cell(cell); - continue; + break; } ret = -EDESTADDRREQ; continue; @@ -130,6 +130,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } + if (namelen == 5 && memcmp(name, "@cell", 5) == 0) + return ERR_PTR(-EINVAL); _enter("%*.*s,%s", namelen, namelen, name, vllist); @@ -334,8 +336,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) return PTR_ERR(new_root); } - set_bit(AFS_CELL_FL_NO_GC, &new_root->flags); - afs_get_cell(new_root); + if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) + afs_get_cell(new_root); /* install the new cell */ write_seqlock(&net->cells_lock); @@ -411,7 +413,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_addrlist(cell->vl_addrs); + afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 41e277f57b20..357de908df3a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -178,8 +178,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work) */ static int afs_deliver_cb_callback(struct afs_call *call) { + struct afs_callback_break *cb; struct sockaddr_rxrpc srx; - struct afs_callback *cb; struct afs_server *server; __be32 *bp; int ret, loop; @@ -201,7 +201,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL); if (!call->buffer) @@ -218,7 +218,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) _debug("unmarshall FID array"); call->request = kcalloc(call->count, - sizeof(struct afs_callback), + sizeof(struct afs_callback_break), GFP_KERNEL); if (!call->request) return -ENOMEM; @@ -229,7 +229,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->fid.vid = ntohl(*bp++); cb->fid.vnode = ntohl(*bp++); cb->fid.unique = ntohl(*bp++); - cb->type = AFSCM_CB_UNTYPED; + cb->cb.type = AFSCM_CB_UNTYPED; } call->offset = 0; @@ -245,7 +245,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -260,9 +260,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb = call->request; bp = call->buffer; for (loop = call->count2; loop > 0; loop--, cb++) { - cb->version = ntohl(*bp++); - cb->expiry = ntohl(*bp++); - cb->type = ntohl(*bp++); + cb->cb.version = ntohl(*bp++); + cb->cb.expiry = ntohl(*bp++); + cb->cb.type = ntohl(*bp++); } call->offset = 0; @@ -500,9 +500,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) b = call->buffer; r = call->request; - r->time_low = ntohl(b[0]); - r->time_mid = ntohl(b[1]); - r->time_hi_and_version = ntohl(b[2]); + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ba2b458b36d1..5889f70d4d27 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1,6 +1,6 @@ /* dir.c: AFS filesystem directory handling * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -10,27 +10,26 @@ */ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/pagemap.h> +#include <linux/swap.h> #include <linux/ctype.h> #include <linux/sched.h> -#include <linux/dns_resolver.h> +#include <linux/task_io_accounting_ops.h> #include "internal.h" +#include "xdr_fs.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); -static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); -static void afs_d_release(struct dentry *dentry); -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, + loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); @@ -43,6 +42,14 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags); +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); + +static int afs_dir_set_page_dirty(struct page *page) +{ + BUG(); /* This should never happen. */ +} const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, @@ -67,15 +74,10 @@ const struct inode_operations afs_dir_inode_operations = { .listxattr = afs_listxattr, }; -const struct file_operations afs_dynroot_file_operations = { - .open = dcache_dir_open, - .release = dcache_dir_close, - .iterate_shared = dcache_readdir, - .llseek = dcache_dir_lseek, -}; - -const struct inode_operations afs_dynroot_inode_operations = { - .lookup = afs_dynroot_lookup, +const struct address_space_operations afs_dir_aops = { + .set_page_dirty = afs_dir_set_page_dirty, + .releasepage = afs_dir_releasepage, + .invalidatepage = afs_dir_invalidatepage, }; const struct dentry_operations afs_fs_dentry_operations = { @@ -85,91 +87,38 @@ const struct dentry_operations afs_fs_dentry_operations = { .d_automount = afs_d_automount, }; -#define AFS_DIR_HASHTBL_SIZE 128 -#define AFS_DIR_DIRENT_SIZE 32 -#define AFS_DIRENT_PER_BLOCK 64 - -union afs_dirent { - struct { - uint8_t valid; - uint8_t unused[1]; - __be16 hash_next; - __be32 vnode; - __be32 unique; - uint8_t name[16]; - uint8_t overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ - } u; - uint8_t extended_name[32]; -}; - -/* AFS directory page header (one at the beginning of every 2048-byte chunk) */ -struct afs_dir_pagehdr { - __be16 npages; - __be16 magic; -#define AFS_DIR_MAGIC htons(1234) - uint8_t nentries; - uint8_t bitmap[8]; - uint8_t pad[19]; -}; - -/* directory block layout */ -union afs_dir_block { - - struct afs_dir_pagehdr pagehdr; - - struct { - struct afs_dir_pagehdr pagehdr; - uint8_t alloc_ctrs[128]; - /* dir hash table */ - uint16_t hashtable[AFS_DIR_HASHTBL_SIZE]; - } hdr; - - union afs_dirent dirents[AFS_DIRENT_PER_BLOCK]; -}; - -/* layout on a linux VM page */ -struct afs_dir_page { - union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)]; +struct afs_lookup_one_cookie { + struct dir_context ctx; + struct qstr name; + bool found; + struct afs_fid fid; }; struct afs_lookup_cookie { - struct dir_context ctx; - struct afs_fid fid; - struct qstr name; - int found; + struct dir_context ctx; + struct qstr name; + bool found; + bool one_only; + unsigned short nr_fids; + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_fid fids[50]; }; /* * check that a directory page is valid */ -bool afs_dir_check_page(struct inode *dir, struct page *page) +static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, + loff_t i_size) { - struct afs_dir_page *dbuf; - struct afs_vnode *vnode = AFS_FS_I(dir); - loff_t latter, i_size, off; + struct afs_xdr_dir_page *dbuf; + loff_t latter, off; int tmp, qty; -#if 0 - /* check the page count */ - qty = desc.size / sizeof(dbuf->blocks[0]); - if (qty == 0) - goto error; - - if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { - printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", - __func__, dir->i_ino, qty, - ntohs(dbuf->blocks[0].pagehdr.npages)); - goto error; - } -#endif - /* Determine how many magic numbers there should be in this page, but * we must take care because the directory may change size under us. */ off = page_offset(page); - i_size = i_size_read(dir); if (i_size <= off) goto checked; @@ -178,112 +127,225 @@ bool afs_dir_check_page(struct inode *dir, struct page *page) qty = PAGE_SIZE; else qty = latter; - qty /= sizeof(union afs_dir_block); + qty /= sizeof(union afs_xdr_dir_block); /* check them */ - dbuf = page_address(page); + dbuf = kmap(page); for (tmp = 0; tmp < qty; tmp++) { - if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { + if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", - __func__, dir->i_ino, tmp, qty, - ntohs(dbuf->blocks[tmp].pagehdr.magic)); - trace_afs_dir_check_failed(vnode, off, i_size); + __func__, dvnode->vfs_inode.i_ino, tmp, qty, + ntohs(dbuf->blocks[tmp].hdr.magic)); + trace_afs_dir_check_failed(dvnode, off, i_size); + kunmap(page); goto error; } + + /* Make sure each block is NUL terminated so we can reasonably + * use string functions on it. The filenames in the page + * *should* be NUL-terminated anyway. + */ + ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0; } + kunmap(page); + checked: - SetPageChecked(page); + afs_stat_v(dvnode, n_read_dir); return true; error: - SetPageError(page); return false; } /* - * discard a page cached in the pagecache + * open an AFS directory file */ -static inline void afs_dir_put_page(struct page *page) +static int afs_dir_open(struct inode *inode, struct file *file) { - kunmap(page); - unlock_page(page); - put_page(page); + _enter("{%lu}", inode->i_ino); + + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags)) + return -ENOENT; + + return afs_open(inode, file); } /* - * get a page into the pagecache + * Read the directory into the pagecache in one go, scrubbing the previous + * contents. The list of pages is returned, pinning them so that they don't + * get reclaimed during the iteration. */ -static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, - struct key *key) +static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) { - struct page *page; - _enter("{%lu},%lu", dir->i_ino, index); - - page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); - if (!IS_ERR(page)) { - lock_page(page); - kmap(page); - if (unlikely(!PageChecked(page))) { - if (PageError(page)) - goto fail; - } + struct afs_read *req; + loff_t i_size; + int nr_pages, nr_inline, i, n; + int ret = -ENOMEM; + +retry: + i_size = i_size_read(&dvnode->vfs_inode); + if (i_size < 2048) + return ERR_PTR(-EIO); + if (i_size > 2048 * 1024) + return ERR_PTR(-EFBIG); + + _enter("%llu", i_size); + + /* Get a request record to hold the page list. We want to hold it + * inline if we can, but we don't want to make an order 1 allocation. + */ + nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; + nr_inline = nr_pages; + if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) + nr_inline = 0; + + req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline, + GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + refcount_set(&req->usage, 1); + req->nr_pages = nr_pages; + req->actual_len = i_size; /* May change */ + req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ + req->data_version = dvnode->status.data_version; /* May change */ + if (nr_inline > 0) { + req->pages = req->array; + } else { + req->pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!req->pages) + goto error; } - return page; -fail: - afs_dir_put_page(page); - _leave(" = -EIO"); - return ERR_PTR(-EIO); -} + /* Get a list of all the pages that hold or will hold the directory + * content. We need to fill in any gaps that we might find where the + * memory reclaimer has been at work. If there are any gaps, we will + * need to reread the entire directory contents. + */ + i = 0; + do { + n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i, + req->nr_pages - i, + req->pages + i); + _debug("find %u at %u/%u", n, i, req->nr_pages); + if (n == 0) { + gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask; + + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + ret = -ENOMEM; + req->pages[i] = __page_cache_alloc(gfp); + if (!req->pages[i]) + goto error; + ret = add_to_page_cache_lru(req->pages[i], + dvnode->vfs_inode.i_mapping, + i, gfp); + if (ret < 0) + goto error; + + set_page_private(req->pages[i], 1); + SetPagePrivate(req->pages[i]); + unlock_page(req->pages[i]); + i++; + } else { + i += n; + } + } while (i < req->nr_pages); -/* - * open an AFS directory file - */ -static int afs_dir_open(struct inode *inode, struct file *file) -{ - _enter("{%lu}", inode->i_ino); + /* If we're going to reload, we need to lock all the pages to prevent + * races. + */ + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + ret = -ERESTARTSYS; + for (i = 0; i < req->nr_pages; i++) + if (lock_page_killable(req->pages[i]) < 0) + goto error_unlock; - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; - if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags)) - return -ENOENT; + ret = afs_fetch_data(dvnode, key, req); + if (ret < 0) + goto error_unlock_all; - return afs_open(inode, file); + task_io_account_read(PAGE_SIZE * req->nr_pages); + + if (req->len < req->file_size) + goto content_has_grown; + + /* Validate the data we just read. */ + ret = -EIO; + for (i = 0; i < req->nr_pages; i++) + if (!afs_dir_check_page(dvnode, req->pages[i], + req->actual_len)) + goto error_unlock_all; + + // TODO: Trim excess pages + + set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + } + +success: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + return req; + +error_unlock_all: + i = req->nr_pages; +error_unlock: + while (i > 0) + unlock_page(req->pages[--i]); +error: + afs_put_read(req); + _leave(" = %d", ret); + return ERR_PTR(ret); + +content_has_grown: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + afs_put_read(req); + goto retry; } /* * deal with one block in an AFS directory */ static int afs_dir_iterate_block(struct dir_context *ctx, - union afs_dir_block *block, + union afs_xdr_dir_block *block, unsigned blkoff) { - union afs_dirent *dire; + union afs_xdr_dirent *dire; unsigned offset, next, curr; size_t nlen; int tmp; _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); - curr = (ctx->pos - blkoff) / sizeof(union afs_dirent); + curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent); /* walk through the block, an entry at a time */ - for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; - offset < AFS_DIRENT_PER_BLOCK; + for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { next = offset + 1; /* skip entries marked unused in the bitmap */ - if (!(block->pagehdr.bitmap[offset / 8] & + if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", - blkoff / sizeof(union afs_dir_block), offset); + blkoff / sizeof(union afs_xdr_dir_block), offset); if (offset >= curr) ctx->pos = blkoff + - next * sizeof(union afs_dirent); + next * sizeof(union afs_xdr_dirent); continue; } @@ -291,34 +353,34 @@ static int afs_dir_iterate_block(struct dir_context *ctx, dire = &block->dirents[offset]; nlen = strnlen(dire->u.name, sizeof(*block) - - offset * sizeof(union afs_dirent)); + offset * sizeof(union afs_xdr_dirent)); _debug("ENT[%zu.%u]: %s %zu \"%s\"", - blkoff / sizeof(union afs_dir_block), offset, + blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) { - if (next >= AFS_DIRENT_PER_BLOCK) { + for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { + if (next >= AFS_DIR_SLOTS_PER_BLOCK) { _debug("ENT[%zu.%u]:" " %u travelled beyond end dir block" " (len %u/%zu)", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); return -EIO; } - if (!(block->pagehdr.bitmap[next / 8] & + if (!(block->hdr.bitmap[next / 8] & (1 << (next % 8)))) { _debug("ENT[%zu.%u]:" " %u unmarked extension (len %u/%zu)", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); return -EIO; } _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), next, tmp, nlen); next++; } @@ -330,13 +392,14 @@ static int afs_dir_iterate_block(struct dir_context *ctx, /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), - ctx->actor == afs_lookup_filldir ? + (ctx->actor == afs_lookup_filldir || + ctx->actor == afs_lookup_one_filldir)? ntohl(dire->u.unique) : DT_UNKNOWN)) { _leave(" = 0 [full]"); return 0; } - ctx->pos = blkoff + next * sizeof(union afs_dirent); + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); } _leave(" = 1 [more]"); @@ -349,8 +412,10 @@ static int afs_dir_iterate_block(struct dir_context *ctx, static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, struct key *key) { - union afs_dir_block *dblock; - struct afs_dir_page *dbuf; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_xdr_dir_page *dbuf; + union afs_xdr_dir_block *dblock; + struct afs_read *req; struct page *page; unsigned blkoff, limit; int ret; @@ -362,45 +427,53 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, return -ESTALE; } + req = afs_read_dir(dvnode, key); + if (IS_ERR(req)) + return PTR_ERR(req); + /* round the file position up to the next entry boundary */ - ctx->pos += sizeof(union afs_dirent) - 1; - ctx->pos &= ~(sizeof(union afs_dirent) - 1); + ctx->pos += sizeof(union afs_xdr_dirent) - 1; + ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1); /* walk through the blocks in sequence */ ret = 0; - while (ctx->pos < dir->i_size) { - blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1); + while (ctx->pos < req->actual_len) { + blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1); - /* fetch the appropriate page from the directory */ - page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + /* Fetch the appropriate page from the directory and re-add it + * to the LRU. + */ + page = req->pages[blkoff / PAGE_SIZE]; + if (!page) { + ret = -EIO; break; } + mark_page_accessed(page); limit = blkoff & ~(PAGE_SIZE - 1); - dbuf = page_address(page); + dbuf = kmap(page); /* deal with the individual blocks stashed on this page */ do { dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / - sizeof(union afs_dir_block)]; + sizeof(union afs_xdr_dir_block)]; ret = afs_dir_iterate_block(ctx, dblock, blkoff); if (ret != 1) { - afs_dir_put_page(page); + kunmap(page); goto out; } - blkoff += sizeof(union afs_dir_block); + blkoff += sizeof(union afs_xdr_dir_block); } while (ctx->pos < dir->i_size && blkoff < limit); - afs_dir_put_page(page); + kunmap(page); ret = 0; } out: + afs_put_read(req); _leave(" = %d", ret); return ret; } @@ -414,23 +487,23 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) } /* - * search the directory for a name + * Search the directory for a single name * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, - int nlen, loff_t fpos, u64 ino, unsigned dtype) +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_lookup_cookie *cookie = - container_of(ctx, struct afs_lookup_cookie, ctx); + struct afs_lookup_one_cookie *cookie = + container_of(ctx, struct afs_lookup_one_cookie, ctx); _enter("{%s,%u},%s,%u,,%llu,%u", cookie->name.name, cookie->name.len, name, nlen, (unsigned long long) ino, dtype); /* insanity checks first */ - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); if (cookie->name.len != nlen || memcmp(cookie->name.name, name, nlen) != 0) { @@ -447,15 +520,15 @@ static int afs_lookup_filldir(struct dir_context *ctx, const char *name, } /* - * do a lookup in a directory + * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup(struct inode *dir, struct dentry *dentry, - struct afs_fid *fid, struct key *key) +static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, + struct afs_fid *fid, struct key *key) { struct afs_super_info *as = dir->i_sb->s_fs_info; - struct afs_lookup_cookie cookie = { - .ctx.actor = afs_lookup_filldir, + struct afs_lookup_one_cookie cookie = { + .ctx.actor = afs_lookup_one_filldir, .name = dentry->d_name, .fid.vid = as->volume->vid }; @@ -482,70 +555,265 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, } /* - * Probe to see if a cell may exist. This prevents positive dentries from - * being created unnecessarily. + * search the directory for a name + * - if afs_dir_iterate_block() spots this function, it'll pass the FID + * uniquifier through dtype */ -static int afs_probe_cell_name(struct dentry *dentry) +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_cell *cell; - const char *name = dentry->d_name.name; - size_t len = dentry->d_name.len; + struct afs_lookup_cookie *cookie = + container_of(ctx, struct afs_lookup_cookie, ctx); int ret; - /* Names prefixed with a dot are R/W mounts. */ - if (name[0] == '.') { - if (len == 1) - return -EINVAL; - name++; - len--; - } + _enter("{%s,%u},%s,%u,,%llu,%u", + cookie->name.name, cookie->name.len, name, nlen, + (unsigned long long) ino, dtype); - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); - if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); - return 0; + /* insanity checks first */ + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (cookie->found) { + if (cookie->nr_fids < 50) { + cookie->fids[cookie->nr_fids].vnode = ino; + cookie->fids[cookie->nr_fids].unique = dtype; + cookie->nr_fids++; + } + } else if (cookie->name.len == nlen && + memcmp(cookie->name.name, name, nlen) == 0) { + cookie->fids[0].vnode = ino; + cookie->fids[0].unique = dtype; + cookie->found = 1; + if (cookie->one_only) + return -1; } - ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); - if (ret == -ENODATA) - ret = -EDESTADDRREQ; + ret = cookie->nr_fids >= 50 ? -1 : 0; + _leave(" = %d", ret); return ret; } /* - * Try to auto mount the mountpoint with pseudo directory, if the autocell - * operation is setted. + * Do a lookup in a directory. We make use of bulk lookup to query a slew of + * files in one go and create inodes for them. The inode of the file we were + * asked for is returned. */ -static struct inode *afs_try_auto_mntpt(struct dentry *dentry, - struct inode *dir, struct afs_fid *fid) +static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, + struct key *key) { - struct afs_vnode *vnode = AFS_FS_I(dir); - struct inode *inode; - int ret = -ENOENT; + struct afs_lookup_cookie *cookie; + struct afs_cb_interest *cbi = NULL; + struct afs_super_info *as = dir->i_sb->s_fs_info; + struct afs_iget_data data; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct inode *inode = NULL; + int ret, i; - _enter("%p{%pd}, {%x:%u}", - dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + + cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + + cookie->ctx.actor = afs_lookup_filldir; + cookie->name = dentry->d_name; + cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ + + read_seqlock_excl(&dvnode->cb_lock); + if (dvnode->cb_interest && + dvnode->cb_interest->server && + test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags)) + cookie->one_only = true; + read_sequnlock_excl(&dvnode->cb_lock); + + for (i = 0; i < 50; i++) + cookie->fids[i].vid = as->volume->vid; + + /* search the directory */ + ret = afs_dir_iterate(dir, &cookie->ctx, key); + if (ret < 0) { + inode = ERR_PTR(ret); + goto out; + } - if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + inode = ERR_PTR(-ENOENT); + if (!cookie->found) goto out; - ret = afs_probe_cell_name(dentry); - if (ret < 0) + /* Check to see if we already have an inode for the primary fid. */ + data.volume = dvnode->volume; + data.fid = cookie->fids[0]; + inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data); + if (inode) goto out; - inode = afs_iget_pseudo_dir(dir->i_sb, false); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + /* Need space for examining all the selected files */ + inode = ERR_PTR(-ENOMEM); + cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status), + GFP_KERNEL); + if (!cookie->statuses) goto out; + + cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback), + GFP_KERNEL); + if (!cookie->callbacks) + goto out_s; + + /* Try FS.InlineBulkStatus first. Abort codes for the individual + * lookups contained therein are stored in the reply without aborting + * the whole operation. + */ + if (cookie->one_only) + goto no_inline_bulk_status; + + inode = ERR_PTR(-ERESTARTSYS); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + if (test_bit(AFS_SERVER_FL_NO_IBULK, + &fc.cbi->server->flags)) { + fc.ac.abort_code = RX_INVALID_OPERATION; + fc.ac.error = -ECONNABORTED; + break; + } + afs_fs_inline_bulk_status(&fc, + afs_v2net(dvnode), + cookie->fids, + cookie->statuses, + cookie->callbacks, + cookie->nr_fids, NULL); + } + + if (fc.ac.error == 0) + cbi = afs_get_cb_interest(fc.cbi); + if (fc.ac.abort_code == RX_INVALID_OPERATION) + set_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags); + inode = ERR_PTR(afs_end_vnode_operation(&fc)); } - *fid = AFS_FS_I(inode)->fid; - _leave("= %p", inode); - return inode; + if (!IS_ERR(inode)) + goto success; + if (fc.ac.abort_code != RX_INVALID_OPERATION) + goto out_c; + +no_inline_bulk_status: + /* We could try FS.BulkStatus next, but this aborts the entire op if + * any of the lookups fails - so, for the moment, revert to + * FS.FetchStatus for just the primary fid. + */ + cookie->nr_fids = 1; + inode = ERR_PTR(-ERESTARTSYS); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + afs_fs_fetch_status(&fc, + afs_v2net(dvnode), + cookie->fids, + cookie->statuses, + cookie->callbacks, + NULL); + } + + if (fc.ac.error == 0) + cbi = afs_get_cb_interest(fc.cbi); + inode = ERR_PTR(afs_end_vnode_operation(&fc)); + } + if (IS_ERR(inode)) + goto out_c; + + for (i = 0; i < cookie->nr_fids; i++) + cookie->statuses[i].abort_code = 0; + +success: + /* Turn all the files into inodes and save the first one - which is the + * one we actually want. + */ + if (cookie->statuses[0].abort_code != 0) + inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code)); + + for (i = 0; i < cookie->nr_fids; i++) { + struct inode *ti; + + if (cookie->statuses[i].abort_code != 0) + continue; + + ti = afs_iget(dir->i_sb, key, &cookie->fids[i], + &cookie->statuses[i], + &cookie->callbacks[i], + cbi); + if (i == 0) { + inode = ti; + } else { + if (!IS_ERR(ti)) + iput(ti); + } + } + +out_c: + afs_put_cb_interest(afs_v2net(dvnode), cbi); + kfree(cookie->callbacks); +out_s: + kfree(cookie->statuses); out: - _leave("= %d", ret); - return ERR_PTR(ret); + kfree(cookie); + return inode; +} + +/* + * Look up an entry in a directory with @sys substitution. + */ +static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry, + struct key *key) +{ + struct afs_sysnames *subs; + struct afs_net *net = afs_i2net(dir); + struct dentry *ret; + char *buf, *p, *name; + int len, i; + + _enter(""); + + ret = ERR_PTR(-ENOMEM); + p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL); + if (!buf) + goto out_p; + if (dentry->d_name.len > 4) { + memcpy(p, dentry->d_name.name, dentry->d_name.len - 4); + p += dentry->d_name.len - 4; + } + + /* There is an ordered list of substitutes that we have to try. */ + read_lock(&net->sysnames_lock); + subs = net->sysnames; + refcount_inc(&subs->usage); + read_unlock(&net->sysnames_lock); + + for (i = 0; i < subs->nr; i++) { + name = subs->subs[i]; + len = dentry->d_name.len - 4 + strlen(name); + if (len >= AFSNAMEMAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out_s; + } + + strcpy(p, name); + ret = lookup_one_len(buf, dentry->d_parent, len); + if (IS_ERR(ret) || d_is_positive(ret)) + goto out_s; + dput(ret); + } + + /* We don't want to d_add() the @sys dentry here as we don't want to + * the cached dentry to hide changes to the sysnames list. + */ + ret = NULL; +out_s: + afs_put_sysnames(subs); + kfree(buf); +out_p: + key_put(key); + return ret; } /* @@ -554,16 +822,13 @@ out: static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode; - struct afs_fid fid; + struct afs_vnode *dvnode = AFS_FS_I(dir); struct inode *inode; struct key *key; int ret; - vnode = AFS_FS_I(dir); - _enter("{%x:%u},%p{%pd},", - vnode->fid.vid, vnode->fid.vnode, dentry, dentry); + dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -572,28 +837,37 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); } - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) { _leave(" = -ESTALE"); return ERR_PTR(-ESTALE); } - key = afs_request_key(vnode->volume->cell); + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); return ERR_CAST(key); } - ret = afs_validate(vnode, key); + ret = afs_validate(dvnode, key); if (ret < 0) { key_put(key); _leave(" = %d [val]", ret); return ERR_PTR(ret); } - ret = afs_do_lookup(dir, dentry, &fid, key); - if (ret < 0) { + if (dentry->d_name.len >= 4 && + dentry->d_name.name[dentry->d_name.len - 4] == '@' && + dentry->d_name.name[dentry->d_name.len - 3] == 's' && + dentry->d_name.name[dentry->d_name.len - 2] == 'y' && + dentry->d_name.name[dentry->d_name.len - 1] == 's') + return afs_lookup_atsys(dir, dentry, key); + + afs_stat_v(dvnode, n_lookup); + inode = afs_do_lookup(dir, dentry, key); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); if (ret == -ENOENT) { - inode = afs_try_auto_mntpt(dentry, dir, &fid); + inode = afs_try_auto_mntpt(dentry, dir); if (!IS_ERR(inode)) { key_put(key); goto success; @@ -611,10 +885,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, _leave(" = %d [do]", ret); return ERR_PTR(ret); } - dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; + dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version; /* instantiate the dentry */ - inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL); key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); @@ -623,9 +896,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, success: d_add(dentry, inode); - _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }", - fid.vnode, - fid.unique, + _leave(" = 0 { ino=%lu v=%u }", d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); @@ -633,67 +904,23 @@ success: } /* - * Look up an entry in a dynroot directory. - */ -static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - struct afs_vnode *vnode; - struct afs_fid fid; - struct inode *inode; - int ret; - - vnode = AFS_FS_I(dir); - - _enter("%pd", dentry); - - ASSERTCMP(d_inode(dentry), ==, NULL); - - if (dentry->d_name.len >= AFSNAMEMAX) { - _leave(" = -ENAMETOOLONG"); - return ERR_PTR(-ENAMETOOLONG); - } - - inode = afs_try_auto_mntpt(dentry, dir, &fid); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); - return NULL; -} - -/* * check that a dentry lookup hit has found a valid entry * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct afs_super_info *as = dentry->d_sb->s_fs_info; struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; struct inode *inode; struct key *key; - void *dir_version; + long dir_version, de_version; int ret; if (flags & LOOKUP_RCU) return -ECHILD; - if (as->dyn_root) - return 1; - if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", @@ -729,14 +956,25 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out_bad_parent; } - dir_version = (void *) (unsigned long) dir->status.data_version; - if (dentry->d_fsdata == dir_version) - goto out_valid; /* the dir contents are unchanged */ + /* We only need to invalidate a dentry if the server's copy changed + * behind our back. If we made the change, it's no problem. Note that + * on a 32-bit system, we only have 32 bits in the dentry to store the + * version. + */ + dir_version = (long)dir->status.data_version; + de_version = (long)dentry->d_fsdata; + if (de_version == dir_version) + goto out_valid; + + dir_version = (long)dir->invalid_before; + if (de_version - dir_version >= 0) + goto out_valid; _debug("dir modified"); + afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key); + ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key); switch (ret) { case 0: /* the filename maps to something */ @@ -789,7 +1027,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) } out_valid: - dentry->d_fsdata = dir_version; + dentry->d_fsdata = (void *)dir_version; dput(parent); key_put(key); _leave(" = 1 [valid]"); @@ -840,7 +1078,7 @@ zap: /* * handle dentry release */ -static void afs_d_release(struct dentry *dentry) +void afs_d_release(struct dentry *dentry) { _enter("%pd", dentry); } @@ -854,6 +1092,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, struct afs_file_status *newstatus, struct afs_callback *newcb) { + struct afs_vnode *vnode; struct inode *inode; if (fc->ac.error < 0) @@ -871,6 +1110,8 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, return; } + vnode = AFS_FS_I(inode); + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); d_add(new_dentry, inode); } @@ -885,6 +1126,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFDIR; @@ -902,7 +1144,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_create(&fc, dentry->d_name.name, mode, + afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -916,6 +1158,11 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error_key; } + if (ret == 0 && + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_create); + key_put(key); _leave(" = 0"); return 0; @@ -939,6 +1186,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) clear_nlink(&vnode->vfs_inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); } } @@ -950,6 +1198,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd}", @@ -965,13 +1214,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_remove(&fc, dentry->d_name.name, true); + afs_fs_remove(&fc, dentry->d_name.name, true, + data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); - if (ret == 0) + if (ret == 0) { afs_dir_remove_subdir(dentry); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_rmdir); + } } key_put(key); @@ -1036,6 +1290,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; unsigned long d_version = (unsigned long)dentry->d_fsdata; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd}", @@ -1062,7 +1317,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_remove(&fc, dentry->d_name.name, false); + afs_fs_remove(&fc, dentry->d_name.name, false, + data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); @@ -1071,6 +1327,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = afs_dir_remove_link( dentry, key, d_version, (unsigned long)dvnode->status.data_version); + if (ret == 0 && + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); } error_key: @@ -1092,6 +1352,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFREG; @@ -1113,7 +1374,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_create(&fc, dentry->d_name.name, mode, + afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1127,6 +1388,10 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_create); + key_put(key); _leave(" = 0"); return 0; @@ -1148,10 +1413,12 @@ static int afs_link(struct dentry *from, struct inode *dir, struct afs_fs_cursor fc; struct afs_vnode *dvnode, *vnode; struct key *key; + u64 data_version; int ret; vnode = AFS_FS_I(d_inode(from)); dvnode = AFS_FS_I(dir); + data_version = dvnode->status.data_version; _enter("{%x:%u},{%x:%u},{%pd}", vnode->fid.vid, vnode->fid.vnode, @@ -1178,7 +1445,7 @@ static int afs_link(struct dentry *from, struct inode *dir, while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; - afs_fs_link(&fc, vnode, dentry->d_name.name); + afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); @@ -1194,6 +1461,10 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, + afs_edit_dir_for_link); + key_put(key); _leave(" = 0"); return 0; @@ -1217,6 +1488,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd},%s", @@ -1241,7 +1513,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_symlink(&fc, dentry->d_name.name, content, + afs_fs_symlink(&fc, dentry->d_name.name, + content, data_version, &newfid, &newstatus); } @@ -1255,6 +1528,10 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_symlink); + key_put(key); _leave(" = 0"); return 0; @@ -1277,6 +1554,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct key *key; + u64 orig_data_version, new_data_version; + bool new_negative = d_is_negative(new_dentry); int ret; if (flags) @@ -1285,6 +1564,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, vnode = AFS_FS_I(d_inode(old_dentry)); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); + orig_data_version = orig_dvnode->status.data_version; + new_data_version = new_dvnode->status.data_version; _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, @@ -1310,7 +1591,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; afs_fs_rename(&fc, old_dentry->d_name.name, - new_dvnode, new_dentry->d_name.name); + new_dvnode, new_dentry->d_name.name, + orig_data_version, new_data_version); } afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); @@ -1322,9 +1604,68 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error_key; } + if (ret == 0) { + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename); + + if (!new_negative && + test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename); + + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) + afs_edit_dir_add(new_dvnode, &new_dentry->d_name, + &vnode->fid, afs_edit_dir_for_rename); + } + error_key: key_put(key); error: _leave(" = %d", ret); return ret; } + +/* + * Release a directory page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not + */ +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + + set_page_private(page, 0); + ClearPagePrivate(page); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_relpg); + return 1; +} + +/* + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{%lu},%u,%u", page->index, offset, length); + + BUG_ON(!PageLocked(page)); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == PAGE_SIZE) { + set_page_private(page, 0); + ClearPagePrivate(page); + } +} diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c new file mode 100644 index 000000000000..8b400f5aead5 --- /dev/null +++ b/fs/afs/dir_edit.c @@ -0,0 +1,505 @@ +/* AFS filesystem directory editing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/iversion.h> +#include "internal.h" +#include "xdr_fs.h" + +/* + * Find a number of contiguous clear bits in a directory block bitmask. + * + * There are 64 slots, which means we can load the entire bitmap into a + * variable. The first bit doesn't count as it corresponds to the block header + * slot. nr_slots is between 1 and 9. + */ +static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots) +{ + u64 bitmap; + u32 mask; + int bit, n; + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + bitmap >>= 1; /* The first entry is metadata */ + bit = 1; + mask = (1 << nr_slots) - 1; + + do { + if (sizeof(unsigned long) == 8) + n = ffz(bitmap); + else + n = ((u32)bitmap) != 0 ? + ffz((u32)bitmap) : + ffz((u32)(bitmap >> 32)) + 32; + bitmap >>= n; + bit += n; + + if ((bitmap & mask) == 0) { + if (bit > 64 - nr_slots) + return -1; + return bit; + } + + n = __ffs(bitmap); + bitmap >>= n; + bit += n; + } while (bitmap); + + return -1; +} + +/* + * Set a number of contiguous bits in the directory block bitmap. + */ +static void afs_set_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask, before, after; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + before = *(u64 *)block->hdr.bitmap; + + block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8); + block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8); + block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8); + block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8); + block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8); + block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8); + block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8); + block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8); + + after = *(u64 *)block->hdr.bitmap; +} + +/* + * Clear a number of contiguous bits in the directory block bitmap. + */ +static void afs_clear_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask, before, after; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + before = *(u64 *)block->hdr.bitmap; + + block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8); + block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8); + block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8); + block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8); + block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8); + block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8); + block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8); + block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8); + + after = *(u64 *)block->hdr.bitmap; +} + +/* + * Scan a directory block looking for a dirent of the right name. + */ +static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name, + unsigned int blocknum) +{ + union afs_xdr_dirent *de; + u64 bitmap; + int d, len, n; + + _enter(""); + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + + for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + d < AFS_DIR_SLOTS_PER_BLOCK; + d++) { + if (!((bitmap >> d) & 1)) + continue; + de = &block->dirents[d]; + if (de->u.valid != 1) + continue; + + /* The block was NUL-terminated by afs_dir_check_page(). */ + len = strlen(de->u.name); + if (len == name->len && + memcmp(de->u.name, name->name, name->len) == 0) + return d; + + n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE); + n /= AFS_DIR_DIRENT_SIZE; + d += n - 1; + } + + return -1; +} + +/* + * Initialise a new directory block. Note that block 0 is special and contains + * some extra metadata. + */ +static void afs_edit_init_block(union afs_xdr_dir_block *meta, + union afs_xdr_dir_block *block, int block_num) +{ + memset(block, 0, sizeof(*block)); + block->hdr.npages = htons(1); + block->hdr.magic = AFS_DIR_MAGIC; + block->hdr.bitmap[0] = 1; + + if (block_num == 0) { + block->hdr.bitmap[0] = 0xff; + block->hdr.bitmap[1] = 0x1f; + memset(block->meta.alloc_ctrs, + AFS_DIR_SLOTS_PER_BLOCK, + sizeof(block->meta.alloc_ctrs)); + meta->meta.alloc_ctrs[0] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0; + } + + if (block_num < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[block_num] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS; +} + +/* + * Edit a directory's file data to add a new directory entry. Doing this after + * create, mkdir, symlink, link or rename if the data version number is + * incremented by exactly one avoids the need to re-download the entire + * directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_add(struct afs_vnode *vnode, + struct qstr *name, struct afs_fid *new_fid, + enum afs_edit_dir_reason why) +{ + union afs_xdr_dir_block *meta, *block; + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + gfp_t gfp; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to need. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + if (i_size == 0) + goto new_directory; + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + /* Find a block that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks + 1; b++) { + /* If the directory extended into a new page, then we need to + * tack a new page on the end. + */ + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index == 0) { + page = page0; + dir_page = meta_page; + } else { + if (nr_blocks >= AFS_DIR_MAX_BLOCKS) + goto error; + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page = find_or_create_page(vnode->vfs_inode.i_mapping, + index, gfp); + if (!page) + goto error; + if (!PagePrivate(page)) { + set_page_private(page, 1); + SetPagePrivate(page); + } + dir_page = kmap(page); + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + _debug("block %u: %2u %3u %u", + b, + (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99, + ntohs(block->hdr.npages), + ntohs(block->hdr.magic)); + + /* Initialise the block if necessary. */ + if (b == nr_blocks) { + _debug("init %u", b); + afs_edit_init_block(meta, block, b); + i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE); + } + + /* Only lower dir pages have a counter in the header. */ + if (b >= AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] >= need_slots) { + /* We need to try and find one or more consecutive + * slots to hold the entry. + */ + slot = afs_find_contig_bits(block, need_slots); + if (slot >= 0) { + _debug("slot %u", slot); + goto found_space; + } + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* There are no spare slots of sufficient size, yet the operation + * succeeded. Download the directory again. + */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +new_directory: + afs_edit_init_block(meta, meta, 0); + i_size = AFS_DIR_BLOCK_SIZE; + i_size_write(&vnode->vfs_inode, i_size); + slot = AFS_DIR_RESV_BLOCKS0; + page = page0; + block = meta; + nr_blocks = 1; + b = 0; + +found_space: + /* Set the dirent slot. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot, + new_fid->vnode, new_fid->unique, name->name); + de = &block->dirents[slot]; + de->u.valid = 1; + de->u.unused[0] = 0; + de->u.hash_next = 0; // TODO: Really need to maintain this + de->u.vnode = htonl(new_fid->vnode); + de->u.unique = htonl(new_fid->unique); + memcpy(de->u.name, name->name, name->len + 1); + de->u.name[name->len] = 0; + + /* Adjust the bitmap. */ + afs_set_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] -= need_slots; + + inode_inc_iversion_raw(&vnode->vfs_inode); + afs_stat_v(vnode, n_dir_cr); + _debug("Insert %s in %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} + +/* + * Edit a directory's file data to remove a new directory entry. Doing this + * after unlink, rmdir or rename if the data version number is incremented by + * exactly one avoids the need to re-download the entire directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_remove(struct afs_vnode *vnode, + struct qstr *name, enum afs_edit_dir_reason why) +{ + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dir_block *meta, *block; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size < AFS_DIR_BLOCK_SIZE || + i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to discard. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + + /* Find a page that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks; b++) { + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index != 0) { + page = find_lock_page(vnode->vfs_inode.i_mapping, index); + if (!page) + goto error; + dir_page = kmap(page); + } else { + page = page0; + dir_page = meta_page; + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + if (b > AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) { + slot = afs_dir_scan_block(block, name, b); + if (slot >= 0) + goto found_dirent; + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* Didn't find the dirent to clobber. Download the directory again. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +found_dirent: + de = &block->dirents[slot]; + + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot, + ntohl(de->u.vnode), ntohl(de->u.unique), + name->name); + + memset(de, 0, sizeof(*de) * need_slots); + + /* Adjust the bitmap. */ + afs_clear_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] += need_slots; + + inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version); + afs_stat_v(vnode, n_dir_rm); + _debug("Remove %s from %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c new file mode 100644 index 000000000000..983f3946ab57 --- /dev/null +++ b/fs/afs/dynroot.c @@ -0,0 +1,209 @@ +/* dir.c: AFS dynamic root handling + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/dns_resolver.h> +#include "internal.h" + +const struct file_operations afs_dynroot_file_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .iterate_shared = dcache_readdir, + .llseek = dcache_dir_lseek, +}; + +/* + * Probe to see if a cell may exist. This prevents positive dentries from + * being created unnecessarily. + */ +static int afs_probe_cell_name(struct dentry *dentry) +{ + struct afs_cell *cell; + const char *name = dentry->d_name.name; + size_t len = dentry->d_name.len; + int ret; + + /* Names prefixed with a dot are R/W mounts. */ + if (name[0] == '.') { + if (len == 1) + return -EINVAL; + name++; + len--; + } + + cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + if (!IS_ERR(cell)) { + afs_put_cell(afs_d2net(dentry), cell); + return 0; + } + + ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + if (ret == -ENODATA) + ret = -EDESTADDRREQ; + return ret; +} + +/* + * Try to auto mount the mountpoint with pseudo directory, if the autocell + * operation is setted. + */ +struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) +{ + struct afs_vnode *vnode = AFS_FS_I(dir); + struct inode *inode; + int ret = -ENOENT; + + _enter("%p{%pd}, {%x:%u}", + dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + + if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + goto out; + + ret = afs_probe_cell_name(dentry); + if (ret < 0) + goto out; + + inode = afs_iget_pseudo_dir(dir->i_sb, false); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + _leave("= %p", inode); + return inode; + +out: + _leave("= %d", ret); + return ERR_PTR(ret); +} + +/* + * Look up @cell in a dynroot directory. This is a substitution for the + * local cell name for the net namespace. + */ +static struct dentry *afs_lookup_atcell(struct dentry *dentry) +{ + struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); + struct dentry *ret; + unsigned int seq = 0; + char *name; + int len; + + if (!net->ws_cell) + return ERR_PTR(-ENOENT); + + ret = ERR_PTR(-ENOMEM); + name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL); + if (!name) + goto out_p; + + rcu_read_lock(); + do { + read_seqbegin_or_lock(&net->cells_lock, &seq); + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len + 1); + } + } while (need_seqretry(&net->cells_lock, seq)); + done_seqretry(&net->cells_lock, seq); + rcu_read_unlock(); + + ret = ERR_PTR(-ENOENT); + if (!cell) + goto out_n; + + ret = lookup_one_len(name, dentry->d_parent, len); + + /* We don't want to d_add() the @cell dentry here as we don't want to + * the cached dentry to hide changes to the local cell name. + */ + +out_n: + kfree(name); +out_p: + return ret; +} + +/* + * Look up an entry in a dynroot directory. + */ +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct afs_vnode *vnode; + struct inode *inode; + int ret; + + vnode = AFS_FS_I(dir); + + _enter("%pd", dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + if (dentry->d_name.len == 5 && + memcmp(dentry->d_name.name, "@cell", 5) == 0) + return afs_lookup_atcell(dentry); + + inode = afs_try_auto_mntpt(dentry, dir); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + if (ret == -ENOENT) { + d_add(dentry, NULL); + _leave(" = NULL [negative]"); + return NULL; + } + _leave(" = %d [do]", ret); + return ERR_PTR(ret); + } + + d_add(dentry, inode); + _leave(" = 0 { ino=%lu v=%u }", + d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); + return NULL; +} + +const struct inode_operations afs_dynroot_inode_operations = { + .lookup = afs_dynroot_lookup, +}; + +/* + * Dirs in the dynamic root don't need revalidation. + */ +static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +/* + * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't + * sleep) + * - called from dput() when d_count is going to 0. + * - return 1 to request dentry be unhashed, 0 otherwise + */ +static int afs_dynroot_d_delete(const struct dentry *dentry) +{ + return d_really_is_positive(dentry); +} + +const struct dentry_operations afs_dynroot_dentry_operations = { + .d_revalidate = afs_dynroot_d_revalidate, + .d_delete = afs_dynroot_d_delete, + .d_release = afs_d_release, + .d_automount = afs_d_automount, +}; diff --git a/fs/afs/file.c b/fs/afs/file.c index 79e665a35fea..c24c08016dd9 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,7 +30,6 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, - .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -146,6 +145,9 @@ int afs_open(struct inode *inode, struct file *file) if (ret < 0) goto error_af; } + + if (file->f_flags & O_TRUNC) + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); file->private_data = af; _leave(" = 0"); @@ -170,6 +172,9 @@ int afs_release(struct inode *inode, struct file *file) _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + if ((file->f_mode & FMODE_WRITE)) + return vfs_fsync(file, 0); + file->private_data = NULL; if (af->wb) afs_put_wb_key(af->wb); @@ -187,10 +192,12 @@ void afs_put_read(struct afs_read *req) { int i; - if (atomic_dec_and_test(&req->usage)) { + if (refcount_dec_and_test(&req->usage)) { for (i = 0; i < req->nr_pages; i++) if (req->pages[i]) put_page(req->pages[i]); + if (req->pages != req->array) + kfree(req->pages); kfree(req); } } @@ -240,6 +247,12 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = afs_end_vnode_operation(&fc); } + if (ret == 0) { + afs_stat_v(vnode, n_fetches); + atomic_long_add(desc->actual_len, + &afs_v2net(vnode)->n_fetch_bytes); + } + _leave(" = %d", ret); return ret; } @@ -297,10 +310,11 @@ int afs_page_filler(void *data, struct page *page) * end of the file, the server will return a short read and the * unmarshalling code will clear the unfilled space. */ - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->pos = (loff_t)page->index << PAGE_SHIFT; req->len = PAGE_SIZE; req->nr_pages = 1; + req->pages = req->array; req->pages[0] = page; get_page(page); @@ -309,10 +323,6 @@ int afs_page_filler(void *data, struct page *page) ret = afs_fetch_data(vnode, key, req); afs_put_read(req); - if (ret >= 0 && S_ISDIR(inode->i_mode) && - !afs_dir_check_page(inode, page)) - ret = -EIO; - if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" @@ -447,10 +457,11 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, if (!req) return -ENOMEM; - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->page_done = afs_readpages_page_done; req->pos = first->index; req->pos <<= PAGE_SHIFT; + req->pages = req->array; /* Transfer the pages to the request. We add them in until one fails * to add to the LRU and then we stop (as that'll make a hole in the diff --git a/fs/afs/flock.c b/fs/afs/flock.c index c40ba2fe3cbe..7a0e017070ec 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -613,7 +613,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) goto error; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 88ec38c2d83c..efacdb7c1dee 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,6 +16,7 @@ #include <linux/iversion.h> #include "internal.h" #include "afs_fs.h" +#include "xdr_fs.h" static const struct afs_fid afs_zero_fid; @@ -44,109 +45,194 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) } /* - * decode an AFSFetchStatus block + * Dump a bad file status record. */ -static void xdr_decode_AFSFetchStatus(const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - afs_dataversion_t *store_version) +static void xdr_dump_bad(const __be32 *bp) { - afs_dataversion_t expected_version; - const __be32 *bp = *_bp; + __be32 x[4]; + int i; + + pr_notice("AFS XDR: Bad status record\n"); + for (i = 0; i < 5 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 4); + pr_notice("0x50: %08x\n", ntohl(x[0])); +} + +/* + * Update the core inode struct from a returned status record. + */ +void afs_update_inode_from_status(struct afs_vnode *vnode, + struct afs_file_status *status, + const afs_dataversion_t *expected_version, + u8 flags) +{ + struct timespec t; umode_t mode; + + t.tv_sec = status->mtime_client; + t.tv_nsec = 0; + vnode->vfs_inode.i_ctime = t; + vnode->vfs_inode.i_mtime = t; + vnode->vfs_inode.i_atime = t; + + if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) { + vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); + vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); + set_nlink(&vnode->vfs_inode, status->nlink); + + mode = vnode->vfs_inode.i_mode; + mode &= ~S_IALLUGO; + mode |= status->mode; + barrier(); + vnode->vfs_inode.i_mode = mode; + } + + if (!(flags & AFS_VNODE_NOT_YET_SET)) { + if (expected_version && + *expected_version != status->data_version) { + _debug("vnode modified %llx on {%x:%u} [exp %llx]", + (unsigned long long) status->data_version, + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long) *expected_version); + vnode->invalid_before = status->data_version; + if (vnode->status.type == AFS_FTYPE_DIR) { + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + afs_stat_v(vnode, n_inval); + } else { + set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); + } + } else if (vnode->status.type == AFS_FTYPE_DIR) { + /* Expected directory change is handled elsewhere so + * that we can locally edit the directory and save on a + * download. + */ + if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + flags &= ~AFS_VNODE_DATA_CHANGED; + } + } + + if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) { + inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + i_size_write(&vnode->vfs_inode, status->size); + } +} + +/* + * decode an AFSFetchStatus block + */ +static int xdr_decode_AFSFetchStatus(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; u64 data_version, size; - bool changed = false; - kuid_t owner; - kgid_t group; + u32 type, abort_code; + u8 flags = 0; + int ret; if (vnode) write_seqlock(&vnode->cb_lock); -#define EXTRACT(DST) \ - do { \ - u32 x = ntohl(*bp++); \ - if (DST != x) \ - changed |= true; \ - DST = x; \ - } while (0) - - status->if_version = ntohl(*bp++); - EXTRACT(status->type); - EXTRACT(status->nlink); - size = ntohl(*bp++); - data_version = ntohl(*bp++); - EXTRACT(status->author); - owner = make_kuid(&init_user_ns, ntohl(*bp++)); - changed |= !uid_eq(owner, status->owner); - status->owner = owner; - EXTRACT(status->caller_access); /* call ticket dependent */ - EXTRACT(status->anon_access); - EXTRACT(status->mode); - bp++; /* parent.vnode */ - bp++; /* parent.unique */ - bp++; /* seg size */ - status->mtime_client = ntohl(*bp++); - status->mtime_server = ntohl(*bp++); - group = make_kgid(&init_user_ns, ntohl(*bp++)); - changed |= !gid_eq(group, status->group); - status->group = group; - bp++; /* sync counter */ - data_version |= (u64) ntohl(*bp++) << 32; - EXTRACT(status->lock_count); - size |= (u64) ntohl(*bp++) << 32; - bp++; /* spare 4 */ - *_bp = bp; + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); + goto bad; + } - if (size != status->size) { - status->size = size; - changed |= true; + type = ntohl(xdr->type); + abort_code = ntohl(xdr->abort_code); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + if (type != status->type && + vnode && + !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { + pr_warning("Vnode %x:%x:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, type); + goto bad; + } + status->type = type; + break; + case AFS_FTYPE_INVALID: + if (abort_code != 0) { + status->abort_code = abort_code; + ret = 0; + goto out; + } + /* Fall through */ + default: + goto bad; } - status->mode &= S_IALLUGO; - _debug("vnode time %lx, %lx", - status->mtime_client, status->mtime_server); +#define EXTRACT_M(FIELD) \ + do { \ + u32 x = ntohl(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) - if (vnode) { - if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - _debug("vnode changed"); - i_size_write(&vnode->vfs_inode, size); - vnode->vfs_inode.i_uid = status->owner; - vnode->vfs_inode.i_gid = status->group; - vnode->vfs_inode.i_generation = vnode->fid.unique; - set_nlink(&vnode->vfs_inode, status->nlink); - - mode = vnode->vfs_inode.i_mode; - mode &= ~S_IALLUGO; - mode |= status->mode; - barrier(); - vnode->vfs_inode.i_mode = mode; - } + EXTRACT_M(nlink); + EXTRACT_M(author); + EXTRACT_M(owner); + EXTRACT_M(caller_access); /* call ticket dependent */ + EXTRACT_M(anon_access); + EXTRACT_M(mode); + EXTRACT_M(group); + + status->mtime_client = ntohl(xdr->mtime_client); + status->mtime_server = ntohl(xdr->mtime_server); + status->lock_count = ntohl(xdr->lock_count); + + size = (u64)ntohl(xdr->size_lo); + size |= (u64)ntohl(xdr->size_hi) << 32; + status->size = size; + + data_version = (u64)ntohl(xdr->data_version_lo); + data_version |= (u64)ntohl(xdr->data_version_hi) << 32; + if (data_version != status->data_version) { + status->data_version = data_version; + flags |= AFS_VNODE_DATA_CHANGED; + } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; - vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; - vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; - inode_set_iversion_raw(&vnode->vfs_inode, data_version); + if (read_req) { + read_req->data_version = data_version; + read_req->file_size = size; } - expected_version = status->data_version; - if (store_version) - expected_version = *store_version; + *_bp = (const void *)*_bp + sizeof(*xdr); - if (expected_version != data_version) { - status->data_version = data_version; - if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - _debug("vnode modified %llx on {%x:%u}", - (unsigned long long) data_version, - vnode->fid.vid, vnode->fid.vnode); - set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); - set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); - } - } else if (store_version) { - status->data_version = data_version; + if (vnode) { + if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) + flags |= AFS_VNODE_NOT_YET_SET; + afs_update_inode_from_status(vnode, status, expected_version, + flags); } + ret = 0; + +out: if (vnode) write_sequnlock(&vnode->cb_lock); + return ret; + +bad: + xdr_dump_bad(*_bp); + ret = afs_protocol_error(call, -EBADMSG); + goto out; } /* @@ -274,7 +360,7 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call) +static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; @@ -288,7 +374,9 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -300,17 +388,18 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* * FS.FetchStatus operation type */ -static const struct afs_call_type afs_RXFSFetchStatus = { - .name = "FS.FetchStatus", +static const struct afs_call_type afs_RXFSFetchStatus_vnode = { + .name = "FS.FetchStatus(vnode)", .op = afs_FS_FetchStatus, - .deliver = afs_deliver_fs_fetch_status, + .deliver = afs_deliver_fs_fetch_status_vnode, .destructor = afs_flat_call_destructor, }; /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, + bool new_inode) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -320,7 +409,8 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy _enter(",%x,{%x:%u},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode, + 16, (21 + 3 + 6) * 4); if (!call) { fc->ac.error = -ENOMEM; return -ENOMEM; @@ -329,6 +419,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->key = fc->key; call->reply[0] = vnode; call->reply[1] = volsync; + call->expected_version = new_inode ? 1 : vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -464,7 +555,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -534,6 +627,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -546,7 +640,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) bp[6] = 0; bp[7] = htonl(lower_32_bits(req->len)); - atomic_inc(&req->usage); + refcount_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); @@ -578,6 +672,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -588,7 +683,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) bp[4] = htonl(lower_32_bits(req->pos)); bp[5] = htonl(lower_32_bits(req->len)); - atomic_inc(&req->usage); + refcount_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); @@ -613,8 +708,10 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || + xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -645,6 +742,7 @@ static const struct afs_call_type afs_RXFSMakeDir = { int afs_fs_create(struct afs_fs_cursor *fc, const char *name, umode_t mode, + u64 current_data_version, struct afs_fid *newfid, struct afs_file_status *newstatus, struct afs_callback *newcb) @@ -672,6 +770,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, call->reply[1] = newfid; call->reply[2] = newstatus; call->reply[3] = newcb; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -715,7 +814,9 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -742,7 +843,8 @@ static const struct afs_call_type afs_RXFSRemoveDir = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) +int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, + u64 current_data_version) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -764,6 +866,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) call->key = fc->key; call->reply[0] = vnode; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -801,8 +904,10 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || + xdr_decode_AFSFetchStatus(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -823,7 +928,7 @@ static const struct afs_call_type afs_RXFSLink = { * make a hard link */ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name) + const char *name, u64 current_data_version) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -844,6 +949,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, call->key = fc->key; call->reply[0] = dvnode; call->reply[1] = vnode; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -885,8 +991,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) || + xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -909,6 +1017,7 @@ static const struct afs_call_type afs_RXFSSymlink = { int afs_fs_symlink(struct afs_fs_cursor *fc, const char *name, const char *contents, + u64 current_data_version, struct afs_fid *newfid, struct afs_file_status *newstatus) { @@ -937,6 +1046,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -987,10 +1097,13 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL); - if (new_dvnode != orig_dvnode) - xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, - NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); + if (new_dvnode != orig_dvnode && + xdr_decode_AFSFetchStatus(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1013,7 +1126,9 @@ static const struct afs_call_type afs_RXFSRename = { int afs_fs_rename(struct afs_fs_cursor *fc, const char *orig_name, struct afs_vnode *new_dvnode, - const char *new_name) + const char *new_name, + u64 current_orig_data_version, + u64 current_new_data_version) { struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; @@ -1041,6 +1156,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, call->key = fc->key; call->reply[0] = orig_dvnode; call->reply[1] = new_dvnode; + call->expected_version = current_orig_data_version + 1; + call->expected_version_2 = current_new_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1089,8 +1206,9 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, - &call->store_version); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1147,7 +1265,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1222,7 +1340,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1252,7 +1370,6 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, */ static int afs_deliver_fs_store_status(struct afs_call *call) { - afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1264,12 +1381,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ - store_version = NULL; - if (call->operation_ID == FSSTOREDATA) - store_version = &call->store_version; - bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1324,7 +1439,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1373,7 +1488,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1418,6 +1533,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -1471,7 +1587,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1518,7 +1634,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1565,7 +1681,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1947,3 +2063,265 @@ int afs_fs_get_capabilities(struct afs_net *net, trace_afs_make_fs_call(call, NULL); return afs_make_call(ac, call, GFP_NOFS, false); } + +/* + * Deliver reply data to an FS.FetchStatus with no vnode. + */ +static int afs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_file_status *status = call->reply[1]; + struct afs_callback *callback = call->reply[2]; + struct afs_volsync *volsync = call->reply[3]; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(call, &bp, status, vnode, + &call->expected_version, NULL); + callback[call->count].version = ntohl(bp[0]); + callback[call->count].expiry = ntohl(bp[1]); + callback[call->count].type = ntohl(bp[2]); + if (vnode) + xdr_decode_AFSCallBack(call, vnode, &bp); + else + bp += 3; + if (volsync) + xdr_decode_AFSVolSync(&bp, volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.FetchStatus operation type + */ +static const struct afs_call_type afs_RXFSFetchStatus = { + .name = "FS.FetchStatus", + .op = afs_FS_FetchStatus, + .deliver = afs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a fid without needing a vnode handle. + */ +int afs_fs_fetch_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fid, + struct afs_file_status *status, + struct afs_callback *callback, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(fc->key), fid->vid, fid->vnode); + + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = status; + call->reply[2] = callback; + call->reply[3] = volsync; + call->expected_version = 1; /* vnode->status.data_version */ + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHSTATUS); + bp[1] = htonl(fid->vid); + bp[2] = htonl(fid->vnode); + bp[3] = htonl(fid->unique); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.InlineBulkStatus call + */ +static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, &call->tmp, 4, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, call->count2); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG); + + call->count = 0; + call->unmarshall++; + more_counts: + call->offset = 0; + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, call->buffer, 21 * 4, true); + if (ret < 0) + return ret; + + bp = call->buffer; + statuses = call->reply[1]; + if (xdr_decode_AFSFetchStatus(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); + + call->count++; + if (call->count < call->count2) + goto more_counts; + + call->count = 0; + call->unmarshall++; + call->offset = 0; + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, &call->tmp, 4, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG); + call->count = 0; + call->unmarshall++; + more_cbs: + call->offset = 0; + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, call->buffer, 3 * 4, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + bp = call->buffer; + callbacks = call->reply[2]; + callbacks[call->count].version = ntohl(bp[0]); + callbacks[call->count].expiry = ntohl(bp[1]); + callbacks[call->count].type = ntohl(bp[2]); + statuses = call->reply[1]; + if (call->count == 0 && vnode && statuses[0].abort_code == 0) + xdr_decode_AFSCallBack(call, vnode, &bp); + call->count++; + if (call->count < call->count2) + goto more_cbs; + + call->offset = 0; + call->unmarshall++; + + case 5: + ret = afs_extract_data(call, call->buffer, 6 * 4, false); + if (ret < 0) + return ret; + + bp = call->buffer; + if (call->reply[3]) + xdr_decode_AFSVolSync(&bp, call->reply[3]); + + call->offset = 0; + call->unmarshall++; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type afs_RXFSInlineBulkStatus = { + .name = "FS.InlineBulkStatus", + .op = afs_FS_InlineBulkStatus, + .deliver = afs_deliver_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 50 files + */ +int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fids, + struct afs_file_status *statuses, + struct afs_callback *callbacks, + unsigned int nr_fids, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + int i; + + _enter(",%x,{%x:%u},%u", + key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + + call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus, + (2 + nr_fids * 3) * 4, + 21 * 4); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = statuses; + call->reply[2] = callbacks; + call->reply[3] = volsync; + call->count2 = nr_fids; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSINLINEBULKSTATUS); + *bp++ = htonl(nr_fids); + for (i = 0; i < nr_fids; i++) { + *bp++ = htonl(fids[i].vid); + *bp++ = htonl(fids[i].vnode); + *bp++ = htonl(fids[i].unique); + } + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &fids[0]); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 65c5b1edd338..06194cfe9724 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -30,12 +30,11 @@ static const struct inode_operations afs_symlink_inode_operations = { }; /* - * map the AFS file status to the inode member variables + * Initialise an inode from the vnode status. */ -static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) +static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key) { struct inode *inode = AFS_VNODE_TO_I(vnode); - bool changed; _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", vnode->status.type, @@ -46,16 +45,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) read_seqlock_excl(&vnode->cb_lock); + afs_update_inode_from_status(vnode, &vnode->status, NULL, + AFS_VNODE_NOT_YET_SET); + switch (vnode->status.type) { case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | vnode->status.mode; inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; + inode->i_mapping->a_ops = &afs_dir_aops; break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ @@ -67,45 +71,31 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; } else { inode->i_mode = S_IFLNK | vnode->status.mode; inode->i_op = &afs_symlink_inode_operations; + inode->i_mapping->a_ops = &afs_fs_aops; } inode_nohighmem(inode); break; default: printk("kAFS: AFS vnode with undefined type\n"); read_sequnlock_excl(&vnode->cb_lock); - return -EBADMSG; + return afs_protocol_error(NULL, -EBADMSG); } - changed = (vnode->status.size != inode->i_size); - - set_nlink(inode, vnode->status.nlink); - inode->i_uid = vnode->status.owner; - inode->i_gid = vnode->status.group; - inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_client; - inode->i_ctime.tv_nsec = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; - inode->i_generation = vnode->fid.unique; - inode_set_iversion_raw(inode, vnode->status.data_version); - inode->i_mapping->a_ops = &afs_fs_aops; + vnode->invalid_before = vnode->status.data_version; read_sequnlock_excl(&vnode->cb_lock); - -#ifdef CONFIG_AFS_FSCACHE - if (changed) - fscache_attr_changed(vnode->cache); -#endif return 0; } /* * Fetch file status from the volume. */ -int afs_fetch_status(struct afs_vnode *vnode, struct key *key) +int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) { struct afs_fs_cursor fc; int ret; @@ -119,7 +109,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key) if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = vnode->cb_break + vnode->cb_s_break; - afs_fs_fetch_file_status(&fc, NULL); + afs_fs_fetch_file_status(&fc, NULL, new_inode); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -255,6 +245,11 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) } __packed key; struct afs_vnode_cache_aux aux; + if (vnode->status.type == AFS_FTYPE_DIR) { + vnode->cache = NULL; + return; + } + key.vnode_id = vnode->fid.vnode; key.unique = vnode->fid.unique; key.vnode_id_ext[0] = 0; @@ -307,7 +302,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, true); if (ret < 0) goto bad_inode; } else { @@ -331,15 +326,12 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_expires_at += ktime_get_real_seconds(); } - /* set up caching before mapping the status, as map-status reads the - * first page of symlinks to see if they're really mountpoints */ - inode->i_size = vnode->status.size; - afs_get_inode_cache(vnode); - - ret = afs_inode_map_status(vnode, key); + ret = afs_inode_init_from_status(vnode, key); if (ret < 0) goto bad_inode; + afs_get_inode_cache(vnode); + /* success */ clear_bit(AFS_VNODE_UNSET, &vnode->flags); inode->i_flags |= S_NOATIME; @@ -349,10 +341,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vnode->cache, NULL, ret == -ENOENT); - vnode->cache = NULL; -#endif iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); @@ -407,8 +395,11 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; - } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) && - !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && + } else if (vnode->status.type == AFS_FTYPE_DIR && + test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) && + vnode->cb_expires_at - 10 > now) { + valid = true; + } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && vnode->cb_expires_at - 10 > now) { valid = true; } @@ -432,7 +423,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -453,8 +444,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - - clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); valid: _leave(" = 0"); @@ -544,7 +533,7 @@ void afs_evict_inode(struct inode *inode) } #endif - afs_put_permits(vnode->permit_cache); + afs_put_permits(rcu_access_pointer(vnode->permit_cache)); _leave(""); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a6a1d75eee41..f8086ec95e24 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -122,7 +122,8 @@ struct afs_call { u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ - afs_dataversion_t store_version; /* updated version expected from store */ + afs_dataversion_t expected_version; /* Updated version expected from store */ + afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ }; struct afs_call_type { @@ -173,11 +174,14 @@ struct afs_read { loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ loff_t remain; /* Amount remaining */ - atomic_t usage; + loff_t file_size; /* File size returned by server */ + afs_dataversion_t data_version; /* Version number returned by server */ + refcount_t usage; unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); - struct page *pages[]; + struct page **pages; + struct page *array[]; }; /* @@ -199,6 +203,18 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) extern struct file_system_type afs_fs_type; /* + * Set of substitutes for @sys. + */ +struct afs_sysnames { +#define AFS_NR_SYSNAME 16 + char *subs[AFS_NR_SYSNAME]; + refcount_t usage; + unsigned short nr; + short error; + char blank[1]; +}; + +/* * AFS network namespace record. */ struct afs_net { @@ -245,9 +261,25 @@ struct afs_net { struct mutex lock_manager_mutex; /* Misc */ - struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ + struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ + struct afs_sysnames *sysnames; + rwlock_t sysnames_lock; + + /* Statistics counters */ + atomic_t n_lookup; /* Number of lookups done */ + atomic_t n_reval; /* Number of dentries needing revalidation */ + atomic_t n_inval; /* Number of invalidations by the server */ + atomic_t n_relpg; /* Number of invalidations by releasepage */ + atomic_t n_read_dir; /* Number of directory pages read */ + atomic_t n_dir_cr; /* Number of directory entry creation edits */ + atomic_t n_dir_rm; /* Number of directory entry removal edits */ + atomic_t n_stores; /* Number of store ops */ + atomic_long_t n_store_bytes; /* Number of bytes stored */ + atomic_long_t n_fetch_bytes; /* Number of bytes fetched */ + atomic_t n_fetches; /* Number of data fetch ops */ }; +extern const char afs_init_sysname[]; extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns enum afs_cell_state { @@ -363,6 +395,7 @@ struct afs_server { #define AFS_SERVER_FL_UPDATING 4 #define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ +#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ atomic_t usage; u32 addr_version; /* Address list version */ @@ -455,10 +488,11 @@ struct afs_vnode { struct afs_volume *volume; /* volume on which vnode resides */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ + afs_dataversion_t invalid_before; /* Child dentries are invalid before this */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - struct afs_permits *permit_cache; /* cache of permits so far obtained */ + struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ @@ -466,12 +500,13 @@ struct afs_vnode { unsigned long flags; #define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ -#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */ +#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ #define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ +#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ @@ -611,7 +646,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; */ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); -extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); +extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); @@ -646,11 +681,26 @@ extern bool afs_cm_incoming_call(struct afs_call *); */ extern const struct file_operations afs_dir_file_operations; extern const struct inode_operations afs_dir_inode_operations; +extern const struct address_space_operations afs_dir_aops; +extern const struct dentry_operations afs_fs_dentry_operations; + +extern void afs_d_release(struct dentry *); + +/* + * dir_edit.c + */ +extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *, + enum afs_edit_dir_reason); +extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason); + +/* + * dynroot.c + */ extern const struct file_operations afs_dynroot_file_operations; extern const struct inode_operations afs_dynroot_inode_operations; -extern const struct dentry_operations afs_fs_dentry_operations; +extern const struct dentry_operations afs_dynroot_dentry_operations; -extern bool afs_dir_check_page(struct inode *, struct page *); +extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); /* * file.c @@ -680,17 +730,23 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *); +#define AFS_VNODE_NOT_YET_SET 0x01 +#define AFS_VNODE_META_CHANGED 0x02 +#define AFS_VNODE_DATA_CHANGED 0x04 +extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *, + const afs_dataversion_t *, u8); + +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); -extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64, struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool); -extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *); -extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, +extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, struct afs_fid *, struct afs_file_status *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, - struct afs_vnode *, const char *); + struct afs_vnode *, const char *, u64, u64); extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, pgoff_t, pgoff_t, unsigned, unsigned); extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); @@ -702,11 +758,18 @@ extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); +extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, unsigned int, + struct afs_volsync *); +extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, struct afs_volsync *); /* * inode.c */ -extern int afs_fetch_status(struct afs_vnode *, struct key *); +extern int afs_fetch_status(struct afs_vnode *, struct key *, bool); extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct super_block *, struct key *, @@ -754,6 +817,13 @@ static inline void afs_put_net(struct afs_net *net) { } +static inline void __afs_stat(atomic_t *s) +{ + atomic_inc(s); +} + +#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n) + /* * misc.c */ @@ -781,6 +851,7 @@ extern int __net_init afs_proc_init(struct afs_net *); extern void __net_exit afs_proc_cleanup(struct afs_net *); extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +extern void afs_put_sysnames(struct afs_sysnames *); /* * rotate.c @@ -809,6 +880,7 @@ extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, void *, size_t, bool); +extern int afs_protocol_error(struct afs_call *, int); static inline int afs_transfer_reply(struct afs_call *call) { @@ -955,7 +1027,6 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern int afs_page_mkwrite(struct vm_fault *); extern void afs_prune_wb_keys(struct afs_vnode *); diff --git a/fs/afs/main.c b/fs/afs/main.c index 15a02a05ff40..d7560168b3bf 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -34,11 +34,42 @@ MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); struct workqueue_struct *afs_wq; struct afs_net __afs_net; +#if defined(CONFIG_ALPHA) +const char afs_init_sysname[] = "alpha_linux26"; +#elif defined(CONFIG_X86_64) +const char afs_init_sysname[] = "amd64_linux26"; +#elif defined(CONFIG_ARM) +const char afs_init_sysname[] = "arm_linux26"; +#elif defined(CONFIG_ARM64) +const char afs_init_sysname[] = "aarch64_linux26"; +#elif defined(CONFIG_X86_32) +const char afs_init_sysname[] = "i386_linux26"; +#elif defined(CONFIG_IA64) +const char afs_init_sysname[] = "ia64_linux26"; +#elif defined(CONFIG_PPC64) +const char afs_init_sysname[] = "ppc64_linux26"; +#elif defined(CONFIG_PPC32) +const char afs_init_sysname[] = "ppc_linux26"; +#elif defined(CONFIG_S390) +#ifdef CONFIG_64BIT +const char afs_init_sysname[] = "s390x_linux26"; +#else +const char afs_init_sysname[] = "s390_linux26"; +#endif +#elif defined(CONFIG_SPARC64) +const char afs_init_sysname[] = "sparc64_linux26"; +#elif defined(CONFIG_SPARC32) +const char afs_init_sysname[] = "sparc_linux26"; +#else +const char afs_init_sysname[] = "unknown_linux26"; +#endif + /* * Initialise an AFS network namespace record. */ static int __net_init afs_net_init(struct afs_net *net) { + struct afs_sysnames *sysnames; int ret; net->live = true; @@ -67,6 +98,16 @@ static int __net_init afs_net_init(struct afs_net *net) INIT_WORK(&net->fs_manager, afs_manage_servers); timer_setup(&net->fs_timer, afs_servers_timer, 0); + ret = -ENOMEM; + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) + goto error_sysnames; + sysnames->subs[0] = (char *)&afs_init_sysname; + sysnames->nr = 1; + refcount_set(&sysnames->usage, 1); + net->sysnames = sysnames; + rwlock_init(&net->sysnames_lock); + /* Register the /proc stuff */ ret = afs_proc_init(net); if (ret < 0) @@ -92,6 +133,8 @@ error_cell_init: net->live = false; afs_proc_cleanup(net); error_proc: + afs_put_sysnames(net->sysnames); +error_sysnames: net->live = false; return ret; } @@ -106,6 +149,7 @@ static void __net_exit afs_net_exit(struct afs_net *net) afs_purge_servers(net); afs_close_socket(net); afs_proc_cleanup(net); + afs_put_sysnames(net->sysnames); } /* diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 4508dd54f789..839a22280606 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -126,6 +126,34 @@ static const struct file_operations afs_proc_servers_fops = { .release = seq_release, }; +static int afs_proc_sysname_open(struct inode *inode, struct file *file); +static int afs_proc_sysname_release(struct inode *inode, struct file *file); +static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); +static void *afs_proc_sysname_next(struct seq_file *p, void *v, + loff_t *pos); +static void afs_proc_sysname_stop(struct seq_file *p, void *v); +static int afs_proc_sysname_show(struct seq_file *m, void *v); +static ssize_t afs_proc_sysname_write(struct file *file, + const char __user *buf, + size_t size, loff_t *_pos); + +static const struct seq_operations afs_proc_sysname_ops = { + .start = afs_proc_sysname_start, + .next = afs_proc_sysname_next, + .stop = afs_proc_sysname_stop, + .show = afs_proc_sysname_show, +}; + +static const struct file_operations afs_proc_sysname_fops = { + .open = afs_proc_sysname_open, + .read = seq_read, + .llseek = seq_lseek, + .release = afs_proc_sysname_release, + .write = afs_proc_sysname_write, +}; + +static const struct file_operations afs_proc_stats_fops; + /* * initialise the /proc/fs/afs/ directory */ @@ -139,7 +167,9 @@ int afs_proc_init(struct afs_net *net) if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops)) + !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) || + !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) || + !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) goto error_tree; _leave(" = 0"); @@ -183,6 +213,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_net *net = afs_seq2net(m); @@ -204,6 +235,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) * clean up after reading from the cells list */ static void afs_proc_cells_stop(struct seq_file *m, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -282,7 +314,8 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto done; } - set_bit(AFS_CELL_FL_NO_GC, &cell->flags); + if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + afs_put_cell(net, cell); printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; @@ -304,7 +337,40 @@ inval: static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, size_t size, loff_t *_pos) { - return 0; + struct afs_cell *cell; + struct afs_net *net = afs_proc2net(file); + unsigned int seq = 0; + char name[AFS_MAXCELLNAME + 1]; + int len; + + if (*_pos > 0) + return 0; + if (!net->ws_cell) + return 0; + + rcu_read_lock(); + do { + read_seqbegin_or_lock(&net->cells_lock, &seq); + len = 0; + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len); + } + } while (need_seqretry(&net->cells_lock, seq)); + done_seqretry(&net->cells_lock, seq); + rcu_read_unlock(); + + if (!len) + return 0; + + name[len++] = '\n'; + if (len > size) + len = size; + if (copy_to_user(buf, name, len) != 0) + return -EFAULT; + *_pos = 1; + return len; } /* @@ -327,6 +393,12 @@ static ssize_t afs_proc_rootcell_write(struct file *file, if (IS_ERR(kbuf)) return PTR_ERR(kbuf); + ret = -EINVAL; + if (kbuf[0] == '.') + goto out; + if (memchr(kbuf, '/', size)) + goto out; + /* trim to first NL */ s = memchr(kbuf, '\n', size); if (s) @@ -339,6 +411,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, if (ret >= 0) ret = size; /* consume everything, always */ +out: kfree(kbuf); _leave(" = %d", ret); return ret; @@ -413,6 +486,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) + __acquires(cell->proc_lock) { struct afs_cell *cell = m->private; @@ -438,6 +512,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) + __releases(cell->proc_lock) { struct afs_cell *cell = p->private; @@ -500,6 +575,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_addr_list *alist; struct afs_cell *cell = m->private; @@ -544,6 +620,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -580,6 +657,7 @@ static int afs_proc_servers_open(struct inode *inode, struct file *file) * first item. */ static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_net *net = afs_seq2net(m); @@ -601,6 +679,7 @@ static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) * clean up after reading from the cells list */ static void afs_proc_servers_stop(struct seq_file *p, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -626,3 +705,244 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) &alist->addrs[alist->index].transport); return 0; } + +void afs_put_sysnames(struct afs_sysnames *sysnames) +{ + int i; + + if (sysnames && refcount_dec_and_test(&sysnames->usage)) { + for (i = 0; i < sysnames->nr; i++) + if (sysnames->subs[i] != afs_init_sysname && + sysnames->subs[i] != sysnames->blank) + kfree(sysnames->subs[i]); + } +} + +/* + * Handle opening of /proc/fs/afs/sysname. If it is opened for writing, we + * assume the caller wants to change the substitution list and we allocate a + * buffer to hold the list. + */ +static int afs_proc_sysname_open(struct inode *inode, struct file *file) +{ + struct afs_sysnames *sysnames; + struct seq_file *m; + int ret; + + ret = seq_open(file, &afs_proc_sysname_ops); + if (ret < 0) + return ret; + + if (file->f_mode & FMODE_WRITE) { + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) { + seq_release(inode, file); + return -ENOMEM; + } + + refcount_set(&sysnames->usage, 1); + m = file->private_data; + m->private = sysnames; + } + + return 0; +} + +/* + * Handle writes to /proc/fs/afs/sysname to set the @sys substitution. + */ +static ssize_t afs_proc_sysname_write(struct file *file, + const char __user *buf, + size_t size, loff_t *_pos) +{ + struct afs_sysnames *sysnames; + struct seq_file *m = file->private_data; + char *kbuf = NULL, *s, *p, *sub; + int ret, len; + + sysnames = m->private; + if (!sysnames) + return -EINVAL; + if (sysnames->error) + return sysnames->error; + + if (size >= PAGE_SIZE - 1) { + sysnames->error = -EINVAL; + return -EINVAL; + } + if (size == 0) + return 0; + + kbuf = memdup_user_nul(buf, size); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + inode_lock(file_inode(file)); + + p = kbuf; + while ((s = strsep(&p, " \t\n"))) { + len = strlen(s); + if (len == 0) + continue; + ret = -ENAMETOOLONG; + if (len >= AFSNAMEMAX) + goto error; + + if (len >= 4 && + s[len - 4] == '@' && + s[len - 3] == 's' && + s[len - 2] == 'y' && + s[len - 1] == 's') + /* Protect against recursion */ + goto invalid; + + if (s[0] == '.' && + (len < 2 || (len == 2 && s[1] == '.'))) + goto invalid; + + if (memchr(s, '/', len)) + goto invalid; + + ret = -EFBIG; + if (sysnames->nr >= AFS_NR_SYSNAME) + goto out; + + if (strcmp(s, afs_init_sysname) == 0) { + sub = (char *)afs_init_sysname; + } else { + ret = -ENOMEM; + sub = kmemdup(s, len + 1, GFP_KERNEL); + if (!sub) + goto out; + } + + sysnames->subs[sysnames->nr] = sub; + sysnames->nr++; + } + + ret = size; /* consume everything, always */ +out: + inode_unlock(file_inode(file)); + kfree(kbuf); + return ret; + +invalid: + ret = -EINVAL; +error: + sysnames->error = ret; + goto out; +} + +static int afs_proc_sysname_release(struct inode *inode, struct file *file) +{ + struct afs_sysnames *sysnames, *kill = NULL; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + + sysnames = m->private; + if (sysnames) { + if (!sysnames->error) { + kill = sysnames; + if (sysnames->nr == 0) { + sysnames->subs[0] = sysnames->blank; + sysnames->nr++; + } + write_lock(&net->sysnames_lock); + kill = net->sysnames; + net->sysnames = sysnames; + write_unlock(&net->sysnames_lock); + } + afs_put_sysnames(kill); + } + + return seq_release(inode, file); +} + +static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) + __acquires(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; + + read_lock(&net->sysnames_lock); + + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; + + *pos += 1; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void afs_proc_sysname_stop(struct seq_file *m, void *v) + __releases(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + + read_unlock(&net->sysnames_lock); +} + +static int afs_proc_sysname_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *sysnames = net->sysnames; + unsigned int i = (unsigned long)v - 1; + + if (i < sysnames->nr) + seq_printf(m, "%s\n", sysnames->subs[i]); + return 0; +} + +/* + * Display general per-net namespace statistics + */ +static int afs_proc_stats_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net(m); + + seq_puts(m, "kAFS statistics\n"); + + seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n", + atomic_read(&net->n_lookup), + atomic_read(&net->n_reval), + atomic_read(&net->n_inval), + atomic_read(&net->n_relpg)); + + seq_printf(m, "dir-data: rdpg=%u\n", + atomic_read(&net->n_read_dir)); + + seq_printf(m, "dir-edit: cr=%u rm=%u\n", + atomic_read(&net->n_dir_cr), + atomic_read(&net->n_dir_rm)); + + seq_printf(m, "file-rd : n=%u nb=%lu\n", + atomic_read(&net->n_fetches), + atomic_long_read(&net->n_fetch_bytes)); + seq_printf(m, "file-wr : n=%u nb=%lu\n", + atomic_read(&net->n_stores), + atomic_long_read(&net->n_store_bytes)); + return 0; +} + +/* + * Open "/proc/fs/afs/stats" to allow reading of the stat counters. + */ +static int afs_proc_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, afs_proc_stats_show, NULL); +} + +static const struct file_operations afs_proc_stats_fops = { + .open = afs_proc_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ad1328d85526..ac0feac9d746 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -21,7 +21,7 @@ /* * Initialise a filesystem server cursor for iterating over FS servers. */ -void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) { memset(fc, 0, sizeof(*fc)); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index f7ae54b6a393..5c6263972ec9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -926,3 +926,12 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, afs_set_call_complete(call, ret, remote_abort); return ret; } + +/* + * Log protocol error production. + */ +noinline int afs_protocol_error(struct afs_call *call, int error) +{ + trace_afs_protocol_error(call, error, __builtin_return_address(0)); + return error; +} diff --git a/fs/afs/security.c b/fs/afs/security.c index b88b7d45fdaa..cea2fff313dc 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -178,18 +178,14 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) { - rcu_read_unlock(); + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) goto someone_else_changed_it; - } /* We need a ref on any permits list we want to copy as we'll have to * drop the lock to do memory allocation. */ - if (permits && !refcount_inc_not_zero(&permits->usage)) { - rcu_read_unlock(); + if (permits && !refcount_inc_not_zero(&permits->usage)) goto someone_else_changed_it; - } rcu_read_unlock(); @@ -278,6 +274,7 @@ someone_else_changed_it: /* Someone else changed the cache under us - don't recheck at this * time. */ + rcu_read_unlock(); return; } @@ -296,8 +293,6 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, _enter("{%x:%u},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); - permits = vnode->permit_cache; - /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); @@ -327,7 +322,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, */ _debug("no valid permit"); - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) { *_access = 0; _leave(" = %d", ret); diff --git a/fs/afs/server.c b/fs/afs/server.c index a43ef77dabae..e23be63998a8 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -59,7 +59,8 @@ struct afs_server *afs_find_server(struct afs_net *net, alist = rcu_dereference(server->addresses); for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { b = &alist->addrs[i].transport.sin6; - diff = (u16)a->sin6_port - (u16)b->sin6_port; + diff = ((u16 __force)a->sin6_port - + (u16 __force)b->sin6_port); if (diff == 0) diff = memcmp(&a->sin6_addr, &b->sin6_addr, @@ -79,10 +80,11 @@ struct afs_server *afs_find_server(struct afs_net *net, alist = rcu_dereference(server->addresses); for (i = 0; i < alist->nr_ipv4; i++) { b = &alist->addrs[i].transport.sin6; - diff = (u16)a->sin6_port - (u16)b->sin6_port; + diff = ((u16 __force)a->sin6_port - + (u16 __force)b->sin6_port); if (diff == 0) - diff = ((u32)a->sin6_addr.s6_addr32[3] - - (u32)b->sin6_addr.s6_addr32[3]); + diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - + (u32 __force)b->sin6_addr.s6_addr32[3]); if (diff == 0) goto found; if (diff < 0) { @@ -381,7 +383,7 @@ static void afs_server_rcu(struct rcu_head *rcu) { struct afs_server *server = container_of(rcu, struct afs_server, rcu); - afs_put_addrlist(server->addresses); + afs_put_addrlist(rcu_access_pointer(server->addresses)); kfree(server); } @@ -390,7 +392,7 @@ static void afs_server_rcu(struct rcu_head *rcu) */ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { - struct afs_addr_list *alist = server->addresses; + struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, .addr = &alist->addrs[0], diff --git a/fs/afs/super.c b/fs/afs/super.c index 3623c952b6ff..65081ec3c36e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -154,7 +154,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) seq_puts(m, "none"); return 0; } - + switch (volume->type) { case AFSVL_RWVOL: break; @@ -269,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, int cellnamesz; _enter(",%s", name); - + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; @@ -418,7 +418,10 @@ static int afs_fill_super(struct super_block *sb, if (!sb->s_root) goto error; - sb->s_d_op = &afs_fs_dentry_operations; + if (params->dyn_root) + sb->s_d_op = &afs_dynroot_dentry_operations; + else + sb->s_d_op = &afs_fs_dentry_operations; _leave(" = 0"); return 0; @@ -676,7 +679,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = 0; return 0; } - + key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 5d8562f1ad4a..1ed7e2fd2f35 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -303,7 +303,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); r->uuid.clock_seq_low = htonl(u->clock_seq_low); for (i = 0; i < 6; i++) - r->uuid.node[i] = ntohl(u->node[i]); + r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); @@ -450,7 +450,7 @@ again: call->count2 = ntohl(*bp); /* Type or next count */ if (call->count > YFS_MAXENDPOINTS) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); if (!alist) @@ -474,7 +474,7 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } size += sizeof(__be32); @@ -487,24 +487,24 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); bp += 6; break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } /* Got either the type of the next entry or the count of * volEndpoints if no more fsEndpoints. */ - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); call->offset = 0; call->count--; @@ -517,7 +517,7 @@ again: if (!call->count) goto end; if (call->count > YFS_MAXENDPOINTS) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->unmarshall = 3; @@ -531,7 +531,7 @@ again: return ret; bp = call->buffer; - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); call->offset = 0; call->unmarshall = 4; @@ -545,7 +545,7 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } if (call->count > 1) @@ -558,16 +558,16 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); bp += 6; break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } /* Got either the type of the next entry or the count of @@ -576,7 +576,7 @@ again: call->offset = 0; call->count--; if (call->count > 0) { - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); goto again; } diff --git a/fs/afs/write.c b/fs/afs/write.c index dbc3c0b0142d..c164698dc304 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -42,10 +42,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, if (!req) return -ENOMEM; - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->pos = pos; req->len = len; req->nr_pages = 1; + req->pages = req->array; req->pages[0] = page; get_page(page); @@ -124,7 +125,12 @@ try_again: page->index, priv); goto flush_conflicting_write; } - if (to < f || from > t) + /* If the file is being filled locally, allow inter-write + * spaces to be merged into writes. If it's not, only write + * back what the user gives us. + */ + if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && + (to < f || from > t)) goto flush_conflicting_write; if (from < f) f = from; @@ -355,6 +361,12 @@ found_key: } switch (ret) { + case 0: + afs_stat_v(vnode, n_stores); + atomic_long_add((last * PAGE_SIZE + to) - + (first * PAGE_SIZE + offset), + &afs_v2net(vnode)->n_store_bytes); + break; case -EACCES: case -EPERM: case -ENOKEY: @@ -412,7 +424,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, trace_afs_page_dirty(vnode, tracepoint_string("WARN"), primary_page->index, priv); - if (start >= final_page || to < PAGE_SIZE) + if (start >= final_page || + (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))) goto no_more; start++; @@ -433,9 +446,10 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } for (loop = 0; loop < n; loop++) { - if (to != PAGE_SIZE) - break; page = pages[loop]; + if (to != PAGE_SIZE && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) + break; if (page->index > final_page) break; if (!trylock_page(page)) @@ -448,7 +462,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, priv = page_private(page); f = priv & AFS_PRIV_MAX; t = priv >> AFS_PRIV_SHIFT; - if (f != 0) { + if (f != 0 && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { unlock_page(page); break; } @@ -735,20 +750,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) } /* - * Flush out all outstanding writes on a file opened for writing when it is - * closed. - */ -int afs_flush(struct file *file, fl_owner_t id) -{ - _enter(""); - - if ((file->f_mode & FMODE_WRITE) == 0) - return 0; - - return vfs_fsync(file, 0); -} - -/* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h new file mode 100644 index 000000000000..aa21f3068d52 --- /dev/null +++ b/fs/afs/xdr_fs.h @@ -0,0 +1,103 @@ +/* AFS fileserver XDR types + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef XDR_FS_H +#define XDR_FS_H + +struct afs_xdr_AFSFetchStatus { + __be32 if_version; +#define AFS_FSTATUS_VERSION 1 + __be32 type; + __be32 nlink; + __be32 size_lo; + __be32 data_version_lo; + __be32 author; + __be32 owner; + __be32 caller_access; + __be32 anon_access; + __be32 mode; + __be32 parent_vnode; + __be32 parent_unique; + __be32 seg_size; + __be32 mtime_client; + __be32 mtime_server; + __be32 group; + __be32 sync_counter; + __be32 data_version_hi; + __be32 lock_count; + __be32 size_hi; + __be32 abort_code; +} __packed; + +#define AFS_DIR_HASHTBL_SIZE 128 +#define AFS_DIR_DIRENT_SIZE 32 +#define AFS_DIR_SLOTS_PER_BLOCK 64 +#define AFS_DIR_BLOCK_SIZE 2048 +#define AFS_DIR_BLOCKS_PER_PAGE (PAGE_SIZE / AFS_DIR_BLOCK_SIZE) +#define AFS_DIR_MAX_SLOTS 65536 +#define AFS_DIR_BLOCKS_WITH_CTR 128 +#define AFS_DIR_MAX_BLOCKS 1023 +#define AFS_DIR_RESV_BLOCKS 1 +#define AFS_DIR_RESV_BLOCKS0 13 + +/* + * Directory entry structure. + */ +union afs_xdr_dirent { + struct { + u8 valid; + u8 unused[1]; + __be16 hash_next; + __be32 vnode; + __be32 unique; + u8 name[16]; + u8 overflow[4]; /* if any char of the name (inc + * NUL) reaches here, consume + * the next dirent too */ + } u; + u8 extended_name[32]; +} __packed; + +/* + * Directory block header (one at the beginning of every 2048-byte block). + */ +struct afs_xdr_dir_hdr { + __be16 npages; + __be16 magic; +#define AFS_DIR_MAGIC htons(1234) + u8 reserved; + u8 bitmap[8]; + u8 pad[19]; +} __packed; + +/* + * Directory block layout + */ +union afs_xdr_dir_block { + struct afs_xdr_dir_hdr hdr; + + struct { + struct afs_xdr_dir_hdr hdr; + u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS]; + __be16 hashtable[AFS_DIR_HASHTBL_SIZE]; + } meta; + + union afs_xdr_dirent dirents[AFS_DIR_SLOTS_PER_BLOCK]; +} __packed; + +/* + * Directory layout on a linux VM page. + */ +struct afs_xdr_dir_page { + union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; +}; + +#endif /* XDR_FS_H */ diff --git a/fs/buffer.c b/fs/buffer.c index f3491074b035..249b83fafe48 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -494,35 +494,12 @@ repeat: return err; } -static void do_thaw_one(struct super_block *sb, void *unused) +void emergency_thaw_bdev(struct super_block *sb) { while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); } -static void do_thaw_all(struct work_struct *work) -{ - iterate_supers(do_thaw_one, NULL); - kfree(work); - printk(KERN_WARNING "Emergency Thaw complete\n"); -} - -/** - * emergency_thaw_all -- forcibly thaw every frozen filesystem - * - * Used for emergency unfreeze of all filesystems via SysRq - */ -void emergency_thaw_all(void) -{ - struct work_struct *work; - - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - INIT_WORK(work, do_thaw_all); - schedule_work(work); - } -} - /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 123c069429a7..a813979b5be0 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -535,35 +535,10 @@ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char return 0; } -#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) -#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) -static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep) +static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, size_t sz) { - __be32 bm[2]; - __be32 *p; - - bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); - bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); - if (bm[1] != 0) { - p = xdr_reserve_space(xdr, 16); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(2); - *p++ = bm[0]; - *p++ = bm[1]; - } else if (bm[0] != 0) { - p = xdr_reserve_space(xdr, 12); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(1); - *p++ = bm[0]; - } else { - p = xdr_reserve_space(xdr, 8); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(0); - } - *savep = p; + if (xdr_stream_encode_uint32_array(xdr, bitmap, sz) < 0) + return cpu_to_be32(NFS4ERR_RESOURCE); return 0; } @@ -656,9 +631,13 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (unlikely(status != 0)) goto out; - status = encode_attr_bitmap(xdr, res->bitmap, &savep); + status = encode_attr_bitmap(xdr, res->bitmap, ARRAY_SIZE(res->bitmap)); if (unlikely(status != 0)) goto out; + status = cpu_to_be32(NFS4ERR_RESOURCE); + savep = xdr_reserve_space(xdr, sizeof(*savep)); + if (unlikely(!savep)) + goto out; status = encode_attr_change(xdr, res->bitmap, res->change_attr); if (unlikely(status != 0)) goto out; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d8b47624fee2..1819d0d0ba4b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -19,6 +19,7 @@ #include <linux/nfs_xdr.h> #include "nfs4_fs.h" +#include "nfs4session.h" #include "delegation.h" #include "internal.h" #include "nfs4trace.h" @@ -171,11 +172,15 @@ again: * nfs_inode_reclaim_delegation - process a delegation reclaim request * @inode: inode to process * @cred: credential to use for request - * @res: new delegation state from server + * @type: delegation type + * @stateid: delegation stateid + * @pagemod_limit: write delegation "space_limit" * */ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, - struct nfs_openres *res) + fmode_t type, + const nfs4_stateid *stateid, + unsigned long pagemod_limit) { struct nfs_delegation *delegation; struct rpc_cred *oldcred = NULL; @@ -185,9 +190,9 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, if (delegation != NULL) { spin_lock(&delegation->lock); if (delegation->inode != NULL) { - nfs4_stateid_copy(&delegation->stateid, &res->delegation); - delegation->type = res->delegation_type; - delegation->pagemod_limit = res->pagemod_limit; + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; delegation->cred = get_rpccred(cred); clear_bit(NFS_DELEGATION_NEED_RECLAIM, @@ -195,14 +200,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, spin_unlock(&delegation->lock); rcu_read_unlock(); put_rpccred(oldcred); - trace_nfs4_reclaim_delegation(inode, res->delegation_type); + trace_nfs4_reclaim_delegation(inode, type); return; } /* We appear to have raced with a delegation return. */ spin_unlock(&delegation->lock); } rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, res); + nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) @@ -329,11 +334,16 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * nfs_inode_set_delegation - set up a delegation on an inode * @inode: inode to which delegation applies * @cred: cred to use for subsequent delegation processing - * @res: new delegation state from server + * @type: delegation type + * @stateid: delegation stateid + * @pagemod_limit: write delegation "space_limit" * * Returns zero on success, or a negative errno value. */ -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, + const nfs4_stateid *stateid, + unsigned long pagemod_limit) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -345,9 +355,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; - nfs4_stateid_copy(&delegation->stateid, &res->delegation); - delegation->type = res->delegation_type; - delegation->pagemod_limit = res->pagemod_limit; + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; delegation->change_attr = inode_peek_iversion_raw(inode); delegation->cred = get_rpccred(cred); delegation->inode = inode; @@ -392,7 +402,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - trace_nfs4_set_delegation(inode, res->delegation_type); + trace_nfs4_set_delegation(inode, type); out: spin_unlock(&clp->cl_lock); @@ -547,6 +557,22 @@ int nfs4_inode_return_delegation(struct inode *inode) return err; } +/** + * nfs4_inode_make_writeable + * @inode: pointer to inode + * + * Make the inode writeable by returning the delegation if necessary + * + * Returns zero on success, or a negative errno value. + */ +int nfs4_inode_make_writeable(struct inode *inode) +{ + if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) || + !nfs4_check_delegation(inode, FMODE_WRITE)) + return nfs4_inode_return_delegation(inode); + return 0; +} + static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 185a09f37a89..bb1ef8c37af4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -36,8 +36,10 @@ enum { NFS_DELEGATION_TEST_EXPIRED, }; -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_return_delegation_noreclaim(struct inode *inode); @@ -70,6 +72,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); +int nfs4_inode_make_writeable(struct inode *inode); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2f3f86726f5b..73f8b43d988c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1272,7 +1272,9 @@ static void nfs_drop_nlink(struct inode *inode) /* drop the inode if we're reasonably sure this is the last link */ if (inode->i_nlink == 1) clear_nlink(inode); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_OTHER; spin_unlock(&inode->i_lock); } @@ -1798,12 +1800,11 @@ static int nfs_safe_remove(struct dentry *dentry) trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { - NFS_PROTO(inode)->return_delegation(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + error = NFS_PROTO(dir)->remove(dir, dentry); if (error == 0) nfs_drop_nlink(inode); } else - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + error = NFS_PROTO(dir)->remove(dir, dentry); if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); trace_nfs_remove_exit(dir, dentry, error); @@ -1932,8 +1933,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry, dentry); trace_nfs_link_enter(inode, dir, dentry); - NFS_PROTO(inode)->return_delegation(inode); - d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { @@ -2023,10 +2022,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - NFS_PROTO(old_inode)->return_delegation(old_inode); - if (new_inode != NULL) - NFS_PROTO(new_inode)->return_delegation(new_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d17a90c4fa37..bd15d0b57626 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -195,7 +195,10 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); + bool have_delegation = nfs_have_delegated_attributes(inode); + if (have_delegation) + flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); if (inode->i_mapping->nrpages == 0) flags &= ~NFS_INO_INVALID_DATA; nfsi->cache_validity |= flags; @@ -447,7 +450,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_mode = fattr->mode; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -493,37 +496,35 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; else if (nfs_server_capable(inode, NFS_CAP_ATIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -608,11 +609,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - /* - * Return any delegations if we're going to change ACLs - */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) error = nfs_refresh_inode(inode, fattr); @@ -645,6 +641,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) /* Optimisation */ if (offset == 0) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); @@ -657,6 +654,7 @@ out: * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode * @attr: pointer to struct iattr + * @fattr: pointer to struct nfs_fattr * * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. @@ -669,6 +667,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; @@ -683,13 +683,12 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, | NFS_INO_INVALID_ACL); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } if (fattr->valid) nfs_update_inode(inode, fattr); - else - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1303,24 +1302,20 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi); } -static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - unsigned long ret = 0; - if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) @@ -1329,17 +1324,13 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); - ret |= NFS_INO_INVALID_ATTR; } - - return ret; } /** @@ -1369,33 +1360,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_CHANGE + | NFS_INO_REVAL_PAGECACHE; if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_MTIME; if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime)) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_SIZE + | NFS_INO_REVAL_PAGECACHE; } } /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; /* Has the link count changed? */ if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; @@ -1597,10 +1596,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } EXPORT_SYMBOL_GPL(nfs_refresh_inode); -static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_post_op_update_inode_locked(struct inode *inode, + struct nfs_fattr *fattr, unsigned int invalid) { - unsigned long invalid = NFS_INO_INVALID_ATTR; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); @@ -1629,7 +1627,9 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); nfs_fattr_set_barrier(fattr); - status = nfs_post_op_update_inode_locked(inode, fattr); + status = nfs_post_op_update_inode_locked(inode, fattr, + NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); spin_unlock(&inode->i_lock); return status; @@ -1681,7 +1681,10 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa fattr->valid |= NFS_ATTR_FATTR_PRESIZE; } out_noforce: - status = nfs_post_op_update_inode_locked(inode, fattr); + status = nfs_post_op_update_inode_locked(inode, fattr, + NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME); return status; } @@ -1789,7 +1792,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ - invalid |= nfs_wcc_update_inode(inode, fattr); + nfs_wcc_update_inode(inode, fattr); if (pnfs_layoutcommit_outstanding(inode)) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; @@ -1803,17 +1806,25 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ if (!have_writers) { - invalid |= NFS_INO_INVALID_ATTR + invalid |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + /* Force revalidate of all attributes */ + save_cache_validity |= NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_SIZE + | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); } inode_set_iversion_raw(inode, fattr->change_attr); } } else { - nfsi->cache_validity |= save_cache_validity; + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_CHANGE + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1821,7 +1832,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_MTIME | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1830,7 +1841,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_CTIME | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1845,7 +1856,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1856,7 +1867,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } else { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_SIZE | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); cache_revalidated = false; @@ -1877,55 +1888,61 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) umode_t newmode = inode->i_mode & S_IFMT; newmode |= fattr->mode & S_IALLUGO; inode->i_mode = newmode; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; } } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; inode->i_uid = fattr->uid; } } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; inode->i_gid = fattr->gid; } } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1942,6 +1959,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + invalid &= ~NFS_INO_INVALID_ATTR; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1962,10 +1980,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attr_gencount = fattr->gencount; } - /* Don't declare attrcache up to date if there were no attrs! */ - if (cache_revalidated) - invalid &= ~NFS_INO_INVALID_ATTR; - /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7327930ad970..eadf1ab31d16 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -138,8 +138,11 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (status == 0) + if (status == 0) { + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); nfs_setattr_update_inode(inode, sattr, fattr); + } dprintk("NFS reply setattr: %d\n", status); return status; } @@ -383,11 +386,11 @@ out: } static int -nfs3_proc_remove(struct inode *dir, const struct qstr *name) +nfs3_proc_remove(struct inode *dir, struct dentry *dentry) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name = *name, + .name = dentry->d_name, }; struct nfs_removeres res; struct rpc_message msg = { @@ -397,7 +400,7 @@ nfs3_proc_remove(struct inode *dir, const struct qstr *name) }; int status = -ENOMEM; - dprintk("NFS call remove %s\n", name->name); + dprintk("NFS call remove %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) goto out; @@ -411,7 +414,7 @@ out: } static void -nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } @@ -433,7 +436,9 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) } static void -nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +nfs3_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } @@ -908,12 +913,6 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags) return 0; } -static int nfs3_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} - static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -990,7 +989,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, - .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cd33bd5da87..09ee36dd8426 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1997,6 +1997,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_entry old = *entry; __be32 *p; int error; + u64 new_cookie; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) @@ -2019,8 +2020,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (unlikely(error)) return error; - entry->prev_cookie = entry->cookie; - error = decode_cookie3(xdr, &entry->cookie); + error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) return error; @@ -2054,6 +2054,9 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, zero_nfs_fh3(entry->fh); } + entry->prev_cookie = entry->cookie; + entry->cookie = new_cookie; + return 0; out_overflow: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 47f3c273245e..b71757e85066 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1045,7 +1045,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, struct nfs_inode *nfsi = NFS_I(dir); spin_lock(&dir->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_DATA; if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; @@ -1669,6 +1671,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo { struct nfs_delegation *delegation; + fmode &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL || (delegation->type & fmode) == fmode) { @@ -1751,12 +1754,16 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) } if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - &data->o_res); + data->owner->so_cred, + data->o_res.delegation_type, + &data->o_res.delegation, + data->o_res.pagemod_limit); else nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - &data->o_res); + data->owner->so_cred, + data->o_res.delegation_type, + &data->o_res.delegation, + data->o_res.pagemod_limit); } /* @@ -2743,27 +2750,40 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st * fields corresponding to attributes that were used to store the verifier. * Make sure we clobber those fields in the later setattr call */ -static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, +static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr, struct nfs4_label **label) { - const u32 *attrset = opendata->o_res.attrset; + const __u32 *bitmask = opendata->o_arg.server->exclcreat_bitmask; + __u32 attrset[3]; + unsigned ret; + unsigned i; - if ((attrset[1] & FATTR4_WORD1_TIME_ACCESS) && - !(sattr->ia_valid & ATTR_ATIME_SET)) - sattr->ia_valid |= ATTR_ATIME; + for (i = 0; i < ARRAY_SIZE(attrset); i++) { + attrset[i] = opendata->o_res.attrset[i]; + if (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE4_1) + attrset[i] &= ~bitmask[i]; + } + + ret = (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE) ? + sattr->ia_valid : 0; - if ((attrset[1] & FATTR4_WORD1_TIME_MODIFY) && - !(sattr->ia_valid & ATTR_MTIME_SET)) - sattr->ia_valid |= ATTR_MTIME; + if ((attrset[1] & (FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET))) { + if (sattr->ia_valid & ATTR_ATIME_SET) + ret |= ATTR_ATIME_SET; + else + ret |= ATTR_ATIME; + } - /* Except MODE, it seems harmless of setting twice. */ - if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && - (attrset[1] & FATTR4_WORD1_MODE || - attrset[2] & FATTR4_WORD2_MODE_UMASK)) - sattr->ia_valid &= ~ATTR_MODE; + if ((attrset[1] & (FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET))) { + if (sattr->ia_valid & ATTR_MTIME_SET) + ret |= ATTR_MTIME_SET; + else + ret |= ATTR_MTIME; + } - if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) + if (!(attrset[2] & FATTR4_WORD2_SECURITY_LABEL)) *label = NULL; + return ret; } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, @@ -2892,12 +2912,15 @@ static int _nfs4_do_open(struct inode *dir, if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { - nfs4_exclusive_attrset(opendata, sattr, &label); + unsigned attrs = nfs4_exclusive_attrset(opendata, sattr, &label); /* * send create attributes which was not set by open * with an extra setattr. */ - if (sattr->ia_valid & NFS4_VALID_ATTRS) { + if (attrs || label) { + unsigned ia_old = sattr->ia_valid; + + sattr->ia_valid = attrs; nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, @@ -2907,6 +2930,7 @@ static int _nfs4_do_open(struct inode *dir, opendata->o_res.f_attr); nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } + sattr->ia_valid = ia_old; } } if (opened && opendata->file_created) @@ -3874,6 +3898,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, if (IS_ERR(label)) return PTR_ERR(label); + /* Return any delegations if we're going to change ACLs */ + if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs4_inode_make_writeable(inode); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); @@ -4048,7 +4076,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->cache_consistency_bitmask, .access = entry->mask, }; struct nfs4_accessres res = { @@ -4062,14 +4089,18 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - res.fattr = nfs_alloc_fattr(); - if (res.fattr == NULL) - return -ENOMEM; + if (!nfs_have_delegated_attributes(inode)) { + res.fattr = nfs_alloc_fattr(); + if (res.fattr == NULL) + return -ENOMEM; + args.bitmask = server->cache_consistency_bitmask; + } status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { nfs_access_set_mask(entry, res.access); - nfs_refresh_inode(inode, res.fattr); + if (res.fattr) + nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); return status; @@ -4199,10 +4230,32 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name) return status; } -static int nfs4_proc_remove(struct inode *dir, const struct qstr *name) +static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry) +{ + struct nfs4_exception exception = { }; + struct inode *inode = d_inode(dentry); + int err; + + if (inode) { + if (inode->i_nlink == 1) + nfs4_inode_return_delegation(inode); + else + nfs4_inode_make_writeable(inode); + } + do { + err = _nfs4_proc_remove(dir, &dentry->d_name); + trace_nfs4_remove(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) { struct nfs4_exception exception = { }; int err; + do { err = _nfs4_proc_remove(dir, name); trace_nfs4_remove(dir, name, err); @@ -4212,17 +4265,20 @@ static int nfs4_proc_remove(struct inode *dir, const struct qstr *name) return err; } -static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; + struct inode *inode = d_inode(dentry); - res->server = server; + res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); nfs_fattr_init(res->dir_attr); + + if (inode) + nfs4_inode_return_delegation(inode); } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) @@ -4248,14 +4304,21 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } -static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +static void nfs4_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_renameargs *arg = msg->rpc_argp; struct nfs_renameres *res = msg->rpc_resp; + struct inode *old_inode = d_inode(old_dentry); + struct inode *new_inode = d_inode(new_dentry); + if (old_inode) + nfs4_inode_make_writeable(old_inode); + if (new_inode) + nfs4_inode_return_delegation(new_inode); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; - res->server = server; + res->server = NFS_SB(old_dentry->d_sb); nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); } @@ -4317,6 +4380,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct } arg.bitmask = nfs4_bitmask(server, res.label); + nfs4_inode_make_writeable(inode); + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(dir, &res.cinfo, res.fattr->time_start); @@ -5310,7 +5375,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; - nfs4_inode_return_delegation(inode); + nfs4_inode_make_writeable(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -5325,7 +5390,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl * so mark the attribute cache invalid. */ spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME; spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); @@ -6621,22 +6687,24 @@ static int nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) { int ret; - struct cb_notify_lock_args *cbnl = key; struct nfs4_lock_waiter *waiter = wait->private; - struct nfs_lowner *lowner = &cbnl->cbnl_owner, - *wowner = waiter->owner; - /* Only wake if the callback was for the same owner */ - if (lowner->clientid != wowner->clientid || - lowner->id != wowner->id || - lowner->s_dev != wowner->s_dev) - return 0; + /* NULL key means to wake up everyone */ + if (key) { + struct cb_notify_lock_args *cbnl = key; + struct nfs_lowner *lowner = &cbnl->cbnl_owner, + *wowner = waiter->owner; - /* Make sure it's for the right inode */ - if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) - return 0; + /* Only wake if the callback was for the same owner. */ + if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev) + return 0; - waiter->notified = true; + /* Make sure it's for the right inode */ + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) + return 0; + + waiter->notified = true; + } /* override "private" so we can use default_wake_function */ wait->private = waiter->task; @@ -6673,6 +6741,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) add_wait_queue(q, &wait); while(!signalled()) { + waiter.notified = false; status = nfs4_proc_setlk(state, cmd, request); if ((status != -EAGAIN) || IS_SETLK(cmd)) break; @@ -8414,6 +8483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf { switch(task->tk_status) { case 0: + wake_up_all(&clp->cl_lock_waitq); + /* Fallthrough */ case -NFS4ERR_COMPLETE_ALREADY: case -NFS4ERR_WRONG_CRED: /* What to do here? */ break; @@ -9593,7 +9664,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, .mkdir = nfs4_proc_mkdir, - .rmdir = nfs4_proc_remove, + .rmdir = nfs4_proc_rmdir, .readdir = nfs4_proc_readdir, .mknod = nfs4_proc_mknod, .statfs = nfs4_proc_statfs, @@ -9614,7 +9685,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, - .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 91a4d4eeb235..c10a422efe6f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -428,7 +428,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; - int err; while (*p != NULL) { parent = *p; @@ -445,9 +444,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); - if (err) - return ERR_PTR(err); rb_link_node(&new->so_server_node, parent, p); rb_insert_color(&new->so_server_node, &server->state_owners); return new; @@ -460,7 +456,6 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - ida_remove(&server->openowner_id, sp->so_seqid.owner_id); } static void @@ -495,6 +490,12 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; + sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0, + gfp_flags); + if (sp->so_seqid.owner_id < 0) { + kfree(sp); + return NULL; + } sp->so_server = server; sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); @@ -526,6 +527,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); put_rpccred(sp->so_cred); + ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -576,13 +578,9 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, new = nfs4_alloc_state_owner(server, cred, gfp_flags); if (new == NULL) goto out; - do { - if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) - break; - spin_lock(&clp->cl_lock); - sp = nfs4_insert_state_owner_locked(new); - spin_unlock(&clp->cl_lock); - } while (sp == ERR_PTR(-EAGAIN)); + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner_locked(new); + spin_unlock(&clp->cl_lock); if (sp != new) nfs4_free_state_owner(new); out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b993ad282de2..9b7392032321 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -98,6 +98,7 @@ static int nfs4_stat_to_errno(int); ((3+NFS4_FHSIZE) >> 2)) #define nfs4_fattr_bitmap_maxsz 4 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define nfstime4_maxsz (3) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -112,7 +113,8 @@ static int nfs4_stat_to_errno(int); #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + nfs4_owner_maxsz + \ + 3*nfstime4_maxsz + \ + nfs4_owner_maxsz + \ nfs4_group_maxsz + nfs4_label_maxsz + \ decode_mdsthreshold_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ @@ -123,7 +125,8 @@ static int nfs4_stat_to_errno(int); nfs4_owner_maxsz + \ nfs4_group_maxsz + \ nfs4_label_maxsz + \ - 4 + 4) + 1 + nfstime4_maxsz + \ + 1 + nfstime4_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_restorefh_maxsz (op_encode_hdr_maxsz) @@ -957,6 +960,35 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n) WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0); } +static ssize_t xdr_encode_bitmap4(struct xdr_stream *xdr, + const __u32 *bitmap, size_t len) +{ + ssize_t ret; + + /* Trim empty words */ + while (len > 0 && bitmap[len-1] == 0) + len--; + ret = xdr_stream_encode_uint32_array(xdr, bitmap, len); + if (WARN_ON_ONCE(ret < 0)) + return ret; + return len; +} + +static size_t mask_bitmap4(const __u32 *bitmap, const __u32 *mask, + __u32 *res, size_t len) +{ + size_t i; + __u32 tmp; + + while (len > 0 && (bitmap[len-1] == 0 || mask[len-1] == 0)) + len--; + for (i = len; i-- > 0;) { + tmp = bitmap[i] & mask[i]; + res[i] = tmp; + } + return len; +} + static void encode_nfs4_seqid(struct xdr_stream *xdr, const struct nfs_seqid *seqid) { @@ -1011,6 +1043,14 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); } +static __be32 * +xdr_encode_nfstime4(__be32 *p, const struct timespec *t) +{ + p = xdr_encode_hyper(p, (__s64)t->tv_sec); + *p++ = cpu_to_be32(t->tv_nsec); + return p; +} + static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs4_label *label, const umode_t *umask, @@ -1022,9 +1062,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, int owner_namelen = 0; int owner_grouplen = 0; __be32 *p; - unsigned i; uint32_t len = 0; - uint32_t bmval_len; uint32_t bmval[3] = { 0 }; /* @@ -1072,7 +1110,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; - len += 16; + len += 4 + (nfstime4_maxsz << 2); } else if (iap->ia_valid & ATTR_ATIME) { bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 4; @@ -1081,7 +1119,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; - len += 16; + len += 4 + (nfstime4_maxsz << 2); } else if (iap->ia_valid & ATTR_MTIME) { bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 4; @@ -1093,19 +1131,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; } - if (bmval[2] != 0) - bmval_len = 3; - else if (bmval[1] != 0) - bmval_len = 2; - else - bmval_len = 1; - - p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len); - - *p++ = cpu_to_be32(bmval_len); - for (i = 0; i < bmval_len; i++) - *p++ = cpu_to_be32(bmval[i]); - *p++ = cpu_to_be32(len); + xdr_encode_bitmap4(xdr, bmval, ARRAY_SIZE(bmval)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); if (bmval[0] & FATTR4_WORD0_SIZE) p = xdr_encode_hyper(p, iap->ia_size); @@ -1118,16 +1145,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); - *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); + p = xdr_encode_nfstime4(p, &iap->ia_atime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + p = xdr_encode_nfstime4(p, &iap->ia_mtime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } @@ -1199,85 +1224,45 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * create->server, create->server->attr_bitmask); } -static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(1); - *p = cpu_to_be32(bitmap); -} - -static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(bm0); - *p = cpu_to_be32(bm1); -} - -static void -encode_getattr_three(struct xdr_stream *xdr, - uint32_t bm0, uint32_t bm1, uint32_t bm2, - struct compound_hdr *hdr) +static void encode_getattr(struct xdr_stream *xdr, + const __u32 *bitmap, const __u32 *mask, size_t len, + struct compound_hdr *hdr) { - __be32 *p; + __u32 masked_bitmap[nfs4_fattr_bitmap_maxsz]; encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - if (bm2) { - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(bm0); - *p++ = cpu_to_be32(bm1); - *p = cpu_to_be32(bm2); - } else if (bm1) { - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(bm0); - *p = cpu_to_be32(bm1); - } else { - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(1); - *p = cpu_to_be32(bm0); + if (mask) { + if (WARN_ON_ONCE(len > ARRAY_SIZE(masked_bitmap))) + len = ARRAY_SIZE(masked_bitmap); + len = mask_bitmap4(bitmap, mask, masked_bitmap, len); + bitmap = masked_bitmap; } + xdr_encode_bitmap4(xdr, bitmap, len); } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], - bitmask[2] & nfs4_fattr_bitmap[2], - hdr); + encode_getattr(xdr, nfs4_fattr_bitmap, bitmask, + ARRAY_SIZE(nfs4_fattr_bitmap), hdr); } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, const u32 *open_bitmap, struct compound_hdr *hdr) { - encode_getattr_three(xdr, - bitmask[0] & open_bitmap[0], - bitmask[1] & open_bitmap[1], - bitmask[2] & open_bitmap[2], - hdr); + encode_getattr(xdr, open_bitmap, bitmask, 3, hdr); } static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_three(xdr, - bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1], - bitmask[2] & nfs4_fsinfo_bitmap[2], - hdr); + encode_getattr(xdr, nfs4_fsinfo_bitmap, bitmask, + ARRAY_SIZE(nfs4_fsinfo_bitmap), hdr); } static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], - bitmask[1] & nfs4_fs_locations_bitmap[1], hdr); + encode_getattr(xdr, nfs4_fs_locations_bitmap, bitmask, + ARRAY_SIZE(nfs4_fs_locations_bitmap), hdr); } static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) @@ -2116,7 +2101,8 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_access(xdr, args->access, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2558,13 +2544,17 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + const __u32 nfs4_acl_bitmap[1] = { + [0] = FATTR4_WORD0_ACL, + }; uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); replen = hdr.replen + op_decode_hdr_maxsz; - encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); + encode_getattr(xdr, nfs4_acl_bitmap, NULL, + ARRAY_SIZE(nfs4_acl_bitmap), &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, 0, args->acl_len); @@ -2643,8 +2633,8 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], - &hdr); + encode_getattr(xdr, nfs4_pathconf_bitmap, args->bitmask, + ARRAY_SIZE(nfs4_pathconf_bitmap), &hdr); encode_nops(&hdr); } @@ -2662,8 +2652,8 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], - args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); + encode_getattr(xdr, nfs4_statfs_bitmap, args->bitmask, + ARRAY_SIZE(nfs4_statfs_bitmap), &hdr); encode_nops(&hdr); } @@ -2683,7 +2673,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); - encode_getattr_three(xdr, bitmask[0], bitmask[1], bitmask[2], &hdr); + encode_getattr(xdr, bitmask, NULL, 3, &hdr); encode_nops(&hdr); } @@ -3217,34 +3207,27 @@ static int decode_ace(struct xdr_stream *xdr, void *ace) return -EIO; } -static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +static ssize_t +decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz) { - uint32_t bmlen; - __be32 *p; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - bmlen = be32_to_cpup(p); + ssize_t ret; - bitmap[0] = bitmap[1] = bitmap[2] = 0; - p = xdr_inline_decode(xdr, (bmlen << 2)); - if (unlikely(!p)) - goto out_overflow; - if (bmlen > 0) { - bitmap[0] = be32_to_cpup(p++); - if (bmlen > 1) { - bitmap[1] = be32_to_cpup(p++); - if (bmlen > 2) - bitmap[2] = be32_to_cpup(p); - } - } - return 0; -out_overflow: + ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz); + if (likely(ret >= 0)) + return ret; + if (ret == -EMSGSIZE) + return sz; print_overflow_msg(__func__, xdr); return -EIO; } +static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + ssize_t ret; + ret = decode_bitmap4(xdr, bitmap, 3); + return ret < 0 ? ret : 0; +} + static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) { __be32 *p; @@ -3980,7 +3963,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, bitmap[1] &= ~FATTR4_WORD1_OWNER; if (owner_name != NULL) { - len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT); + len = decode_nfs4_string(xdr, owner_name, GFP_NOIO); if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, owner_name->data); @@ -4015,7 +3998,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; if (group_name != NULL) { - len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT); + len = decode_nfs4_string(xdr, group_name, GFP_NOIO); if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, group_name->data); @@ -4155,19 +4138,25 @@ out_overflow: return -EIO; } +static __be32 * +xdr_decode_nfstime4(__be32 *p, struct timespec *t) +{ + __u64 sec; + + p = xdr_decode_hyper(p, &sec); + t-> tv_sec = (time_t)sec; + t->tv_nsec = be32_to_cpup(p++); + return p; +} + static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) { __be32 *p; - uint64_t sec; - uint32_t nsec; - p = xdr_inline_decode(xdr, 12); + p = xdr_inline_decode(xdr, nfstime4_maxsz << 2); if (unlikely(!p)) goto out_overflow; - p = xdr_decode_hyper(p, &sec); - nsec = be32_to_cpup(p); - time->tv_sec = (time_t)sec; - time->tv_nsec = (long)nsec; + xdr_decode_nfstime4(p, time); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5470,21 +5459,13 @@ decode_savefh(struct xdr_stream *xdr) static int decode_setattr(struct xdr_stream *xdr) { - __be32 *p; - uint32_t bmlen; int status; status = decode_op_hdr(xdr, OP_SETATTR); if (status) return status; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - bmlen = be32_to_cpup(p); - p = xdr_inline_decode(xdr, bmlen << 2); - if (likely(p)) + if (decode_bitmap4(xdr, NULL, 0) >= 0) return 0; -out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } @@ -6255,7 +6236,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_access(xdr, &res->supported, &res->access); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server); + if (res->fattr) + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -7535,6 +7517,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, unsigned int savep; uint32_t bitmap[3] = {0}; uint32_t len; + uint64_t new_cookie; __be32 *p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; @@ -7551,8 +7534,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) goto out_overflow; - entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &new_cookie); entry->len = be32_to_cpup(p); p = xdr_inline_decode(xdr, entry->len); @@ -7586,6 +7568,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + entry->prev_cookie = entry->cookie; + entry->cookie = new_cookie; + return 0; out_overflow: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f7fd9192d4bc..4e93d6308733 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -300,11 +300,11 @@ out: } static int -nfs_proc_remove(struct inode *dir, const struct qstr *name) +nfs_proc_remove(struct inode *dir, struct dentry *dentry) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name = *name, + .name = dentry->d_name, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], @@ -312,7 +312,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name) }; int status; - dprintk("NFS call remove %s\n", name->name); + dprintk("NFS call remove %pd2\n",dentry); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); @@ -321,7 +321,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name) } static void -nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } @@ -338,7 +338,9 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) } static void -nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +nfs_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } @@ -671,12 +673,6 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags) return 0; } -static int nfs_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} - static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -741,7 +737,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, - .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 630b4a3c1a93..bf54fc9ae135 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) data->args.fh = NFS_FH(dir); nfs_fattr_init(data->res.dir_attr); - NFS_PROTO(dir)->unlink_setup(&msg, dir); + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); task_setup_data.rpc_client = NFS_CLIENT(dir); task = rpc_run_task(&task_setup_data); @@ -386,7 +386,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, nfs_sb_active(old_dir->i_sb); - NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry); return rpc_run_task(&task_setup_data); } @@ -463,9 +463,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) fileid = NFS_FILEID(d_inode(dentry)); - /* Return delegation in anticipation of the rename */ - NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry)); - sdentry = NULL; do { int slen; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6579f3b367bd..0193053bc139 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -231,6 +231,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c if (i_size >= end) goto out; i_size_write(inode, end); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); out: spin_unlock(&inode->i_lock); @@ -1562,8 +1563,11 @@ static int nfs_writeback_done(struct rpc_task *task, } /* Deal with the suid/sgid bit corner case */ - if (nfs_should_remove_suid(inode)) - nfs_mark_for_revalidate(inode); + if (nfs_should_remove_suid(inode)) { + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; + spin_unlock(&inode->i_lock); + } return 0; } diff --git a/fs/super.c b/fs/super.c index 672538ca9831..5fa9a8d8d865 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,7 @@ #include <linux/user_namespace.h> #include "internal.h" +static int thaw_super_locked(struct super_block *sb); static LIST_HEAD(super_blocks); static DEFINE_SPINLOCK(sb_lock); @@ -574,6 +575,28 @@ void drop_super_exclusive(struct super_block *sb) } EXPORT_SYMBOL(drop_super_exclusive); +static void __iterate_supers(void (*f)(struct super_block *)) +{ + struct super_block *sb, *p = NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (hlist_unhashed(&sb->s_instances)) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + + f(sb); + + spin_lock(&sb_lock); + if (p) + __put_super(p); + p = sb; + } + if (p) + __put_super(p); + spin_unlock(&sb_lock); +} /** * iterate_supers - call function for all active superblocks * @f: function to call @@ -881,33 +904,22 @@ cancel_readonly: return retval; } -static void do_emergency_remount(struct work_struct *work) +static void do_emergency_remount_callback(struct super_block *sb) { - struct super_block *sb, *p = NULL; - - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_write(&sb->s_umount); - if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && - !sb_rdonly(sb)) { - /* - * What lock protects sb->s_flags?? - */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); - } - up_write(&sb->s_umount); - spin_lock(&sb_lock); - if (p) - __put_super(p); - p = sb; + down_write(&sb->s_umount); + if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && + !sb_rdonly(sb)) { + /* + * What lock protects sb->s_flags?? + */ + do_remount_sb(sb, SB_RDONLY, NULL, 1); } - if (p) - __put_super(p); - spin_unlock(&sb_lock); + up_write(&sb->s_umount); +} + +static void do_emergency_remount(struct work_struct *work) +{ + __iterate_supers(do_emergency_remount_callback); kfree(work); printk("Emergency Remount complete\n"); } @@ -923,6 +935,40 @@ void emergency_remount(void) } } +static void do_thaw_all_callback(struct super_block *sb) +{ + down_write(&sb->s_umount); + if (sb->s_root && sb->s_flags & MS_BORN) { + emergency_thaw_bdev(sb); + thaw_super_locked(sb); + } else { + up_write(&sb->s_umount); + } +} + +static void do_thaw_all(struct work_struct *work) +{ + __iterate_supers(do_thaw_all_callback); + kfree(work); + printk(KERN_WARNING "Emergency Thaw complete\n"); +} + +/** + * emergency_thaw_all -- forcibly thaw every frozen filesystem + * + * Used for emergency unfreeze of all filesystems via SysRq + */ +void emergency_thaw_all(void) +{ + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_thaw_all); + schedule_work(work); + } +} + /* * Unnamed block devices are dummy devices used by virtual * filesystems which don't use real block-devices. -- jrs @@ -1492,11 +1538,10 @@ EXPORT_SYMBOL(freeze_super); * * Unlocks the filesystem and marks it writeable again after freeze_super(). */ -int thaw_super(struct super_block *sb) +static int thaw_super_locked(struct super_block *sb) { int error; - down_write(&sb->s_umount); if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { up_write(&sb->s_umount); return -EINVAL; @@ -1527,4 +1572,10 @@ out: deactivate_locked_super(sb); return 0; } + +int thaw_super(struct super_block *sb) +{ + down_write(&sb->s_umount); + return thaw_super_locked(sb); +} EXPORT_SYMBOL(thaw_super); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index d591bb77f592..40a916efd7c0 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -254,6 +254,8 @@ int acpi_processor_pstate_control(void); /* note: this locks both the calling module and the processor module if a _PPC object exists, rmmod is disallowed then */ int acpi_processor_notify_smm(struct module *calling_module); +int acpi_processor_get_psd(acpi_handle handle, + struct acpi_psd_package *pdomain); /* parsing the _P* objects. */ extern int acpi_processor_get_performance_info(struct acpi_processor *pr); diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 04c4cc6fd820..66d1d45fa2e1 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -25,6 +25,50 @@ #define mmiowb() do {} while (0) #endif +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar() rmb() +#else +#define __io_ar() barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() barrier() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par() __io_ar() +#endif + + /* * __raw_{read,write}{b,w,l,q}() access memory in native endianness. * @@ -110,7 +154,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr) #define readb readb static inline u8 readb(const volatile void __iomem *addr) { - return __raw_readb(addr); + u8 val; + + __io_br(); + val = __raw_readb(addr); + __io_ar(); + return val; } #endif @@ -118,7 +167,12 @@ static inline u8 readb(const volatile void __iomem *addr) #define readw readw static inline u16 readw(const volatile void __iomem *addr) { - return __le16_to_cpu(__raw_readw(addr)); + u16 val; + + __io_br(); + val = __le16_to_cpu(__raw_readw(addr)); + __io_ar(); + return val; } #endif @@ -126,7 +180,12 @@ static inline u16 readw(const volatile void __iomem *addr) #define readl readl static inline u32 readl(const volatile void __iomem *addr) { - return __le32_to_cpu(__raw_readl(addr)); + u32 val; + + __io_br(); + val = __le32_to_cpu(__raw_readl(addr)); + __io_ar(); + return val; } #endif @@ -135,7 +194,12 @@ static inline u32 readl(const volatile void __iomem *addr) #define readq readq static inline u64 readq(const volatile void __iomem *addr) { - return __le64_to_cpu(__raw_readq(addr)); + u64 val; + + __io_br(); + val = __le64_to_cpu(__raw_readq(addr)); + __io_ar(); + return val; } #endif #endif /* CONFIG_64BIT */ @@ -144,7 +208,9 @@ static inline u64 readq(const volatile void __iomem *addr) #define writeb writeb static inline void writeb(u8 value, volatile void __iomem *addr) { + __io_bw(); __raw_writeb(value, addr); + __io_aw(); } #endif @@ -152,7 +218,9 @@ static inline void writeb(u8 value, volatile void __iomem *addr) #define writew writew static inline void writew(u16 value, volatile void __iomem *addr) { + __io_bw(); __raw_writew(cpu_to_le16(value), addr); + __io_aw(); } #endif @@ -160,7 +228,9 @@ static inline void writew(u16 value, volatile void __iomem *addr) #define writel writel static inline void writel(u32 value, volatile void __iomem *addr) { + __io_bw(); __raw_writel(__cpu_to_le32(value), addr); + __io_aw(); } #endif @@ -169,7 +239,9 @@ static inline void writel(u32 value, volatile void __iomem *addr) #define writeq writeq static inline void writeq(u64 value, volatile void __iomem *addr) { + __io_bw(); __raw_writeq(__cpu_to_le64(value), addr); + __io_aw(); } #endif #endif /* CONFIG_64BIT */ @@ -180,35 +252,67 @@ static inline void writeq(u64 value, volatile void __iomem *addr) * accesses. */ #ifndef readb_relaxed -#define readb_relaxed readb +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + return __raw_readb(addr); +} #endif #ifndef readw_relaxed -#define readw_relaxed readw +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + return __le16_to_cpu(__raw_readw(addr)); +} #endif #ifndef readl_relaxed -#define readl_relaxed readl +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + return __le32_to_cpu(__raw_readl(addr)); +} #endif #if defined(readq) && !defined(readq_relaxed) -#define readq_relaxed readq +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + return __le64_to_cpu(__raw_readq(addr)); +} #endif #ifndef writeb_relaxed -#define writeb_relaxed writeb +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + __raw_writeb(value, addr); +} #endif #ifndef writew_relaxed -#define writew_relaxed writew +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + __raw_writew(cpu_to_le16(value), addr); +} #endif #ifndef writel_relaxed -#define writel_relaxed writel +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + __raw_writel(__cpu_to_le32(value), addr); +} #endif #if defined(writeq) && !defined(writeq_relaxed) -#define writeq_relaxed writeq +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + __raw_writeq(__cpu_to_le64(value), addr); +} #endif /* @@ -363,7 +467,12 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #define inb inb static inline u8 inb(unsigned long addr) { - return readb(PCI_IOBASE + addr); + u8 val; + + __io_pbr(); + val = __raw_readb(PCI_IOBASE + addr); + __io_par(); + return val; } #endif @@ -371,7 +480,12 @@ static inline u8 inb(unsigned long addr) #define inw inw static inline u16 inw(unsigned long addr) { - return readw(PCI_IOBASE + addr); + u16 val; + + __io_pbr(); + val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr)); + __io_par(); + return val; } #endif @@ -379,7 +493,12 @@ static inline u16 inw(unsigned long addr) #define inl inl static inline u32 inl(unsigned long addr) { - return readl(PCI_IOBASE + addr); + u32 val; + + __io_pbr(); + val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr)); + __io_par(); + return val; } #endif @@ -387,7 +506,9 @@ static inline u32 inl(unsigned long addr) #define outb outb static inline void outb(u8 value, unsigned long addr) { - writeb(value, PCI_IOBASE + addr); + __io_pbw(); + __raw_writeb(value, PCI_IOBASE + addr); + __io_paw(); } #endif @@ -395,7 +516,9 @@ static inline void outb(u8 value, unsigned long addr) #define outw outw static inline void outw(u16 value, unsigned long addr) { - writew(value, PCI_IOBASE + addr); + __io_pbw(); + __raw_writew(cpu_to_le16(value), PCI_IOBASE + addr); + __io_paw(); } #endif @@ -403,7 +526,9 @@ static inline void outw(u16 value, unsigned long addr) #define outl outl static inline void outl(u32 value, unsigned long addr) { - writel(value, PCI_IOBASE + addr); + __io_pbw(); + __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr); + __io_paw(); } #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 92efaf1f8977..760d8da1b6c7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1667,7 +1667,7 @@ typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { - const filldir_t actor; + filldir_t actor; loff_t pos; }; @@ -2445,6 +2445,7 @@ extern int sync_blockdev(struct block_device *bdev); extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); +extern void emergency_thaw_bdev(struct super_block *sb); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); @@ -2470,6 +2471,11 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } +static inline int emergency_thaw_bdev(struct super_block *sb) +{ + return 0; +} + static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) { } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 38187c68063d..2f129bbfaae8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -198,14 +198,24 @@ struct nfs_inode { /* * Cache validity bit flags */ -#define NFS_INO_INVALID_ATTR 0x0001 /* cached attrs are invalid */ -#define NFS_INO_INVALID_DATA 0x0002 /* cached data is invalid */ -#define NFS_INO_INVALID_ATIME 0x0004 /* cached atime is invalid */ -#define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ -#define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ -#define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ -#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ -#define NFS_INO_INVALID_LABEL 0x0080 /* cached label is invalid */ +#define NFS_INO_INVALID_DATA BIT(1) /* cached data is invalid */ +#define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ +#define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ +#define NFS_INO_INVALID_ACL BIT(4) /* cached acls are invalid */ +#define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ +#define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ +#define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ +#define NFS_INO_INVALID_CHANGE BIT(8) /* cached change is invalid */ +#define NFS_INO_INVALID_CTIME BIT(9) /* cached ctime is invalid */ +#define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ +#define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ +#define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ + +#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ + | NFS_INO_INVALID_CTIME \ + | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_SIZE \ + | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* * Bit offsets in flags field @@ -292,10 +302,11 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | - NFS_INO_REVAL_PAGECACHE | - NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6959968dc36a..34d28564ecf3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1590,11 +1590,13 @@ struct nfs_rpc_ops { unsigned int); int (*create) (struct inode *, struct dentry *, struct iattr *, int); - int (*remove) (struct inode *, const struct qstr *); - void (*unlink_setup) (struct rpc_message *, struct inode *dir); + int (*remove) (struct inode *, struct dentry *); + void (*unlink_setup) (struct rpc_message *, struct dentry *); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); - void (*rename_setup) (struct rpc_message *msg, struct inode *dir); + void (*rename_setup) (struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry); void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); @@ -1633,7 +1635,6 @@ struct nfs_rpc_ops { struct iattr *iattr, int *); int (*have_delegation)(struct inode *, fmode_t); - int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ed761f751ecb..9b11b6a0978c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -217,5 +217,12 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_cleanup_clids(void); + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d950223c64b1..2bd68177a442 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -253,6 +253,12 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } +ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, + size_t size); +ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, + size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, + size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); /** @@ -313,6 +319,31 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n) } /** + * xdr_stream_encode_opaque_inline - Encode opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to void pointer + * @len: size of object + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len) +{ + size_t count = sizeof(__u32) + xdr_align_size(len); + __be32 *p = xdr_reserve_space(xdr, count); + + if (unlikely(!p)) { + *ptr = NULL; + return -EMSGSIZE; + } + xdr_encode_opaque(p, NULL, len); + *ptr = ++p; + return count; +} + +/** * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: pointer to opaque data object @@ -356,6 +387,31 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len) } /** + * xdr_stream_encode_uint32_array - Encode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: array of integers + * @array_size: number of elements in @array + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_uint32_array(struct xdr_stream *xdr, + const __u32 *array, size_t array_size) +{ + ssize_t ret = (array_size+1) * sizeof(__u32); + __be32 *p = xdr_reserve_space(xdr, ret); + + if (unlikely(!p)) + return -EMSGSIZE; + *p++ = cpu_to_be32(array_size); + for (; array_size > 0; p++, array++, array_size--) + *p = cpu_to_be32p(array); + return ret; +} + +/** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store integer @@ -432,6 +488,44 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle } return len; } + +/** + * xdr_stream_decode_uint32_array - Decode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: location to store the integer array or NULL + * @array_size: number of elements to store + * + * Return values: + * On success, returns number of elements stored in @array + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the array exceeds @array_size + */ +static inline ssize_t +xdr_stream_decode_uint32_array(struct xdr_stream *xdr, + __u32 *array, size_t array_size) +{ + __be32 *p; + __u32 len; + ssize_t retval; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); + if (unlikely(!p)) + return -EBADMSG; + if (array == NULL) + return len; + if (len <= array_size) { + if (len < array_size) + memset(array+len, 0, (array_size-len)*sizeof(*array)); + array_size = len; + retval = len; + } else + retval = -EMSGSIZE; + for (; array_size > 0; p++, array++, array_size--) + *array = be32_to_cpup(p); + return retval; +} #endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 7fad83881ce1..5fea0fb420df 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -197,7 +197,7 @@ struct rpc_xprt { struct list_head free; /* free slots */ unsigned int max_reqs; /* max number of slots */ unsigned int min_reqs; /* min number of slots */ - atomic_t num_reqs; /* total slots */ + unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ atomic_t swapper; /* we're swapping over this @@ -373,6 +373,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); void xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); +void xprt_update_rtt(struct rpc_task *task); void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_pin_rqst(struct rpc_rqst *req); void xprt_unpin_rqst(struct rpc_rqst *req); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 82c219dfd3bb..9737fbec7019 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -31,6 +31,7 @@ struct timespec64 get_monotonic_coarse64(void); extern void getrawmonotonic64(struct timespec64 *ts); extern void ktime_get_ts64(struct timespec64 *ts); extern time64_t ktime_get_seconds(void); +extern time64_t __ktime_get_real_seconds(void); extern time64_t ktime_get_real_seconds(void); extern void ktime_get_active_ts64(struct timespec64 *ts); diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h index 8716d5942b65..8fcf8908a694 100644 --- a/include/net/slhc_vj.h +++ b/include/net/slhc_vj.h @@ -127,6 +127,7 @@ typedef __u32 int32; */ struct cstate { byte_t cs_this; /* connection id number (xmit) */ + bool initialized; /* true if initialized */ struct cstate *next; /* next in ring (xmit) */ struct iphdr cs_ip; /* ip/tcp hdr from most recent packet */ struct tcphdr cs_tcp; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 63815f66b274..f0820554caa9 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -49,6 +49,7 @@ enum afs_fs_operation { afs_FS_ExtendLock = 157, /* AFS Extend a file lock */ afs_FS_ReleaseLock = 158, /* AFS Release a file lock */ afs_FS_Lookup = 161, /* AFS lookup file in directory */ + afs_FS_InlineBulkStatus = 65536, /* AFS Fetch multiple file statuses with errors */ afs_FS_FetchData64 = 65537, /* AFS Fetch file data */ afs_FS_StoreData64 = 65538, /* AFS Store file data */ afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */ @@ -62,6 +63,27 @@ enum afs_vl_operation { afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ }; +enum afs_edit_dir_op { + afs_edit_dir_create, + afs_edit_dir_create_error, + afs_edit_dir_create_inval, + afs_edit_dir_create_nospc, + afs_edit_dir_delete, + afs_edit_dir_delete_error, + afs_edit_dir_delete_inval, + afs_edit_dir_delete_noent, +}; + +enum afs_edit_dir_reason { + afs_edit_dir_for_create, + afs_edit_dir_for_link, + afs_edit_dir_for_mkdir, + afs_edit_dir_for_rename, + afs_edit_dir_for_rmdir, + afs_edit_dir_for_symlink, + afs_edit_dir_for_unlink, +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -93,6 +115,7 @@ enum afs_vl_operation { EM(afs_FS_ExtendLock, "FS.ExtendLock") \ EM(afs_FS_ReleaseLock, "FS.ReleaseLock") \ EM(afs_FS_Lookup, "FS.Lookup") \ + EM(afs_FS_InlineBulkStatus, "FS.InlineBulkStatus") \ EM(afs_FS_FetchData64, "FS.FetchData64") \ EM(afs_FS_StoreData64, "FS.StoreData64") \ EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \ @@ -104,6 +127,25 @@ enum afs_vl_operation { EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \ E_(afs_VL_GetCapabilities, "VL.GetCapabilities") +#define afs_edit_dir_ops \ + EM(afs_edit_dir_create, "create") \ + EM(afs_edit_dir_create_error, "c_fail") \ + EM(afs_edit_dir_create_inval, "c_invl") \ + EM(afs_edit_dir_create_nospc, "c_nspc") \ + EM(afs_edit_dir_delete, "delete") \ + EM(afs_edit_dir_delete_error, "d_err ") \ + EM(afs_edit_dir_delete_inval, "d_invl") \ + E_(afs_edit_dir_delete_noent, "d_nent") + +#define afs_edit_dir_reasons \ + EM(afs_edit_dir_for_create, "Create") \ + EM(afs_edit_dir_for_link, "Link ") \ + EM(afs_edit_dir_for_mkdir, "MkDir ") \ + EM(afs_edit_dir_for_rename, "Rename") \ + EM(afs_edit_dir_for_rmdir, "RmDir ") \ + EM(afs_edit_dir_for_symlink, "Symlnk") \ + E_(afs_edit_dir_for_unlink, "Unlink") + /* * Export enum symbols via userspace. @@ -116,6 +158,8 @@ enum afs_vl_operation { afs_call_traces; afs_fs_operations; afs_vl_operations; +afs_edit_dir_ops; +afs_edit_dir_reasons; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -462,6 +506,75 @@ TRACE_EVENT(afs_call_state, __entry->ret, __entry->abort) ); +TRACE_EVENT(afs_edit_dir, + TP_PROTO(struct afs_vnode *dvnode, + enum afs_edit_dir_reason why, + enum afs_edit_dir_op op, + unsigned int block, + unsigned int slot, + unsigned int f_vnode, + unsigned int f_unique, + const char *name), + + TP_ARGS(dvnode, why, op, block, slot, f_vnode, f_unique, name), + + TP_STRUCT__entry( + __field(unsigned int, vnode ) + __field(unsigned int, unique ) + __field(enum afs_edit_dir_reason, why ) + __field(enum afs_edit_dir_op, op ) + __field(unsigned int, block ) + __field(unsigned short, slot ) + __field(unsigned int, f_vnode ) + __field(unsigned int, f_unique ) + __array(char, name, 18 ) + ), + + TP_fast_assign( + int __len = strlen(name); + __len = min(__len, 17); + __entry->vnode = dvnode->fid.vnode; + __entry->unique = dvnode->fid.unique; + __entry->why = why; + __entry->op = op; + __entry->block = block; + __entry->slot = slot; + __entry->f_vnode = f_vnode; + __entry->f_unique = f_unique; + memcpy(__entry->name, name, __len); + __entry->name[__len] = 0; + ), + + TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x %s", + __entry->vnode, __entry->unique, + __print_symbolic(__entry->why, afs_edit_dir_reasons), + __print_symbolic(__entry->op, afs_edit_dir_ops), + __entry->block, __entry->slot, + __entry->f_vnode, __entry->f_unique, + __entry->name) + ); + +TRACE_EVENT(afs_protocol_error, + TP_PROTO(struct afs_call *call, int error, const void *where), + + TP_ARGS(call, error, where), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(int, error ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->call = call ? call->debug_id : 0; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("c=%08x r=%d sp=%pSR", + __entry->call, __entry->error, __entry->where) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 922cb8968fb2..335d87242439 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -50,9 +50,9 @@ DEFINE_EVENT(rpc_task_status, rpc_bind_status, ); TRACE_EVENT(rpc_connect_status, - TP_PROTO(struct rpc_task *task, int status), + TP_PROTO(const struct rpc_task *task), - TP_ARGS(task, status), + TP_ARGS(task), TP_STRUCT__entry( __field(unsigned int, task_id) @@ -63,7 +63,7 @@ TRACE_EVENT(rpc_connect_status, TP_fast_assign( __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; - __entry->status = status; + __entry->status = task->tk_status; ), TP_printk("task:%u@%u status=%d", @@ -103,9 +103,9 @@ TRACE_EVENT(rpc_request, DECLARE_EVENT_CLASS(rpc_task_running, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + TP_PROTO(const struct rpc_task *task, const void *action), - TP_ARGS(clnt, task, action), + TP_ARGS(task, action), TP_STRUCT__entry( __field(unsigned int, task_id) @@ -117,7 +117,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, ), TP_fast_assign( - __entry->client_id = clnt ? clnt->cl_clid : -1; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; __entry->task_id = task->tk_pid; __entry->action = action; __entry->runstate = task->tk_runstate; @@ -136,33 +137,33 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_EVENT(rpc_task_running, rpc_task_begin, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + TP_PROTO(const struct rpc_task *task, const void *action), - TP_ARGS(clnt, task, action) + TP_ARGS(task, action) ); DEFINE_EVENT(rpc_task_running, rpc_task_run_action, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + TP_PROTO(const struct rpc_task *task, const void *action), - TP_ARGS(clnt, task, action) + TP_ARGS(task, action) ); DEFINE_EVENT(rpc_task_running, rpc_task_complete, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), + TP_PROTO(const struct rpc_task *task, const void *action), - TP_ARGS(clnt, task, action) + TP_ARGS(task, action) ); DECLARE_EVENT_CLASS(rpc_task_queued, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), - TP_ARGS(clnt, task, q), + TP_ARGS(task, q), TP_STRUCT__entry( __field(unsigned int, task_id) @@ -175,7 +176,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, ), TP_fast_assign( - __entry->client_id = clnt ? clnt->cl_clid : -1; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; __entry->task_id = task->tk_pid; __entry->timeout = task->tk_timeout; __entry->runstate = task->tk_runstate; @@ -196,18 +198,63 @@ DECLARE_EVENT_CLASS(rpc_task_queued, DEFINE_EVENT(rpc_task_queued, rpc_task_sleep, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), - TP_ARGS(clnt, task, q) + TP_ARGS(task, q) ); DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), + + TP_ARGS(task, q) + +); + +TRACE_EVENT(rpc_stats_latency, + + TP_PROTO( + const struct rpc_task *task, + ktime_t backlog, + ktime_t rtt, + ktime_t execute + ), - TP_ARGS(clnt, task, q) + TP_ARGS(task, backlog, rtt, execute), + TP_STRUCT__entry( + __field(u32, xid) + __field(int, version) + __string(progname, task->tk_client->cl_program->name) + __string(procname, rpc_proc_name(task)) + __field(unsigned long, backlog) + __field(unsigned long, rtt) + __field(unsigned long, execute) + __string(addr, + task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, + task->tk_xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->version = task->tk_client->cl_vers; + __assign_str(progname, task->tk_client->cl_program->name) + __assign_str(procname, rpc_proc_name(task)) + __entry->backlog = ktime_to_us(backlog); + __entry->rtt = ktime_to_us(rtt); + __entry->execute = ktime_to_us(execute); + __assign_str(addr, + task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, + task->tk_xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + __get_str(addr), __get_str(port), __entry->xid, + __get_str(progname), __entry->version, __get_str(procname), + __entry->backlog, __entry->rtt, __entry->execute) ); /* @@ -406,6 +453,27 @@ DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst, TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), TP_ARGS(xprt, xid, status)); +TRACE_EVENT(xprt_ping, + TP_PROTO(const struct rpc_xprt *xprt, int status), + + TP_ARGS(xprt, status), + + TP_STRUCT__entry( + __field(int, status) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=[%s]:%s status=%d", + __get_str(addr), __get_str(port), __entry->status) +); + TRACE_EVENT(xs_tcp_data_ready, TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total), diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 9b0eb574f0d1..6d1384abfbdf 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -42,6 +42,9 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + /* * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel * available pte bits. @@ -60,6 +63,26 @@ /* operation as Dom0 is supported */ #define XENFEAT_dom0 11 +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ +#define XENFEAT_memory_op_vnode_supported 13 + +/* arm: Hypervisor supports ARM SMC calling convention. */ +#define XENFEAT_ARM_SMCCC_supported 14 + +/* + * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP + * must be located in lower 1MB, as required by ACPI Specification for IA-PC + * systems. + * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains + * the "linux" string. + */ +#define XENFEAT_linux_rsdp_unrestricted 15 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 90ff129c88a2..62c301ad0773 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -242,11 +242,11 @@ static void kdb_printbp(kdb_bp_t *bp, int i) kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT); if (bp->bp_enabled) - kdb_printf("\n is enabled"); + kdb_printf("\n is enabled "); else kdb_printf("\n is disabled"); - kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n", + kdb_printf(" addr at %016lx, hardtype=%d installed=%d\n", bp->bp_addr, bp->bp_type, bp->bp_installed); kdb_printf("\n"); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index dbb0781a0533..e405677ee08d 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1150,6 +1150,16 @@ void kdb_set_current_task(struct task_struct *p) kdb_current_regs = NULL; } +static void drop_newline(char *buf) +{ + size_t len = strlen(buf); + + if (len == 0) + return; + if (*(buf + len - 1) == '\n') + *(buf + len - 1) = '\0'; +} + /* * kdb_local - The main code for kdb. This routine is invoked on a * specific processor, it is not global. The main kdb() routine @@ -1327,6 +1337,7 @@ do_full_getstr: cmdptr = cmd_head; diag = kdb_parse(cmdbuf); if (diag == KDB_NOTFOUND) { + drop_newline(cmdbuf); kdb_printf("Unknown kdb command: '%s'\n", cmdbuf); diag = 0; } @@ -1566,6 +1577,7 @@ static int kdb_md(int argc, const char **argv) int symbolic = 0; int valid = 0; int phys = 0; + int raw = 0; kdbgetintenv("MDCOUNT", &mdcount); kdbgetintenv("RADIX", &radix); @@ -1575,9 +1587,10 @@ static int kdb_md(int argc, const char **argv) repeat = mdcount * 16 / bytesperword; if (strcmp(argv[0], "mdr") == 0) { - if (argc != 2) + if (argc == 2 || (argc == 0 && last_addr != 0)) + valid = raw = 1; + else return KDB_ARGCOUNT; - valid = 1; } else if (isdigit(argv[0][2])) { bytesperword = (int)(argv[0][2] - '0'); if (bytesperword == 0) { @@ -1613,7 +1626,10 @@ static int kdb_md(int argc, const char **argv) radix = last_radix; bytesperword = last_bytesperword; repeat = last_repeat; - mdcount = ((repeat * bytesperword) + 15) / 16; + if (raw) + mdcount = repeat; + else + mdcount = ((repeat * bytesperword) + 15) / 16; } if (argc) { @@ -1630,7 +1646,10 @@ static int kdb_md(int argc, const char **argv) diag = kdbgetularg(argv[nextarg], &val); if (!diag) { mdcount = (int) val; - repeat = mdcount * 16 / bytesperword; + if (raw) + repeat = mdcount; + else + repeat = mdcount * 16 / bytesperword; } } if (argc >= nextarg+1) { @@ -1640,8 +1659,15 @@ static int kdb_md(int argc, const char **argv) } } - if (strcmp(argv[0], "mdr") == 0) - return kdb_mdr(addr, mdcount); + if (strcmp(argv[0], "mdr") == 0) { + int ret; + last_addr = addr; + ret = kdb_mdr(addr, mdcount); + last_addr += mdcount; + last_repeat = mdcount; + last_bytesperword = bytesperword; // to make REPEAT happy + return ret; + } switch (radix) { case 10: @@ -2473,41 +2499,6 @@ static int kdb_kill(int argc, const char **argv) return 0; } -struct kdb_tm { - int tm_sec; /* seconds */ - int tm_min; /* minutes */ - int tm_hour; /* hours */ - int tm_mday; /* day of the month */ - int tm_mon; /* month */ - int tm_year; /* year */ -}; - -static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm) -{ - /* This will work from 1970-2099, 2100 is not a leap year */ - static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31, - 31, 30, 31, 30, 31 }; - memset(tm, 0, sizeof(*tm)); - tm->tm_sec = tv->tv_sec % (24 * 60 * 60); - tm->tm_mday = tv->tv_sec / (24 * 60 * 60) + - (2 * 365 + 1); /* shift base from 1970 to 1968 */ - tm->tm_min = tm->tm_sec / 60 % 60; - tm->tm_hour = tm->tm_sec / 60 / 60; - tm->tm_sec = tm->tm_sec % 60; - tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1)); - tm->tm_mday %= (4*365+1); - mon_day[1] = 29; - while (tm->tm_mday >= mon_day[tm->tm_mon]) { - tm->tm_mday -= mon_day[tm->tm_mon]; - if (++tm->tm_mon == 12) { - tm->tm_mon = 0; - ++tm->tm_year; - mon_day[1] = 28; - } - } - ++tm->tm_mday; -} - /* * Most of this code has been lifted from kernel/timer.c::sys_sysinfo(). * I cannot call that code directly from kdb, it has an unconditional @@ -2515,10 +2506,10 @@ static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm) */ static void kdb_sysinfo(struct sysinfo *val) { - struct timespec uptime; - ktime_get_ts(&uptime); + u64 uptime = ktime_get_mono_fast_ns(); + memset(val, 0, sizeof(*val)); - val->uptime = uptime.tv_sec; + val->uptime = div_u64(uptime, NSEC_PER_SEC); val->loads[0] = avenrun[0]; val->loads[1] = avenrun[1]; val->loads[2] = avenrun[2]; @@ -2533,8 +2524,8 @@ static void kdb_sysinfo(struct sysinfo *val) */ static int kdb_summary(int argc, const char **argv) { - struct timespec now; - struct kdb_tm tm; + time64_t now; + struct tm tm; struct sysinfo val; if (argc) @@ -2548,9 +2539,9 @@ static int kdb_summary(int argc, const char **argv) kdb_printf("domainname %s\n", init_uts_ns.name.domainname); kdb_printf("ccversion %s\n", __stringify(CCVERSION)); - now = __current_kernel_time(); - kdb_gmtime(&now, &tm); - kdb_printf("date %04d-%02d-%02d %02d:%02d:%02d " + now = __ktime_get_real_seconds(); + time64_to_tm(now, 0, &tm); + kdb_printf("date %04ld-%02d-%02d %02d:%02d:%02d " "tz_minuteswest %d\n", 1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index d35cc2d3a4cc..990b3cc526c8 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) } if (i >= ARRAY_SIZE(kdb_name_table)) { debug_kfree(kdb_name_table[0]); - memcpy(kdb_name_table, kdb_name_table+1, + memmove(kdb_name_table, kdb_name_table+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-1)); } else { debug_kfree(knt1); knt1 = kdb_name_table[i]; - memcpy(kdb_name_table+i, kdb_name_table+i+1, + memmove(kdb_name_table+i, kdb_name_table+i+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-i-1)); } diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index fdbeeb02dde9..cf5c0828ee31 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -31,6 +31,4 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask) } #endif -extern time64_t __ktime_get_real_seconds(void); - #endif /* _TIMEKEEPING_INTERNAL_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 47aeb04c1997..de7cc540450f 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -719,7 +719,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, goto out_warn; *dma_handle = __phys_to_dma(dev, phys_addr); - if (dma_coherent_ok(dev, *dma_handle, size)) + if (!dma_coherent_ok(dev, *dma_handle, size)) goto out_unmap; memset(phys_to_virt(phys_addr), 0, size); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a8772a978224..9c169bb2444d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -781,8 +781,14 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) tunnel->encap.type == TUNNEL_ENCAP_NONE) { dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } else { + dev->features &= ~NETIF_F_GSO_SOFTWARE; + dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; } dev->features |= NETIF_F_LLTX; + } else { + dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; + dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); } } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 14b67dfacc4b..0fbd3ee26165 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -335,26 +335,6 @@ err_tlock: } EXPORT_SYMBOL_GPL(l2tp_session_register); -/* Lookup a tunnel by id - */ -struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id) -{ - struct l2tp_tunnel *tunnel; - struct l2tp_net *pn = l2tp_pernet(net); - - rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - rcu_read_unlock_bh(); - return tunnel; - } - } - rcu_read_unlock_bh(); - - return NULL; -} -EXPORT_SYMBOL_GPL(l2tp_tunnel_find); - struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth) { struct l2tp_net *pn = l2tp_pernet(net); @@ -1436,74 +1416,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 { struct l2tp_tunnel *tunnel = NULL; int err; - struct socket *sock = NULL; - struct sock *sk = NULL; - struct l2tp_net *pn; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; - /* Get the tunnel socket from the fd, which was opened by - * the userspace L2TP daemon. If not specified, create a - * kernel socket. - */ - if (fd < 0) { - err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, - cfg, &sock); - if (err < 0) - goto err; - } else { - sock = sockfd_lookup(fd, &err); - if (!sock) { - pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n", - tunnel_id, fd, err); - err = -EBADF; - goto err; - } - - /* Reject namespace mismatches */ - if (!net_eq(sock_net(sock->sk), net)) { - pr_err("tunl %u: netns mismatch\n", tunnel_id); - err = -EINVAL; - goto err; - } - } - - sk = sock->sk; - if (cfg != NULL) encap = cfg->encap; - /* Quick sanity checks */ - err = -EPROTONOSUPPORT; - if (sk->sk_type != SOCK_DGRAM) { - pr_debug("tunl %hu: fd %d wrong socket type\n", - tunnel_id, fd); - goto err; - } - switch (encap) { - case L2TP_ENCAPTYPE_UDP: - if (sk->sk_protocol != IPPROTO_UDP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); - goto err; - } - break; - case L2TP_ENCAPTYPE_IP: - if (sk->sk_protocol != IPPROTO_L2TP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); - goto err; - } - break; - } - - /* Check if this socket has already been prepped */ - tunnel = l2tp_tunnel(sk); - if (tunnel != NULL) { - /* This socket has already been prepped */ - err = -EBUSY; - goto err; - } - tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL); if (tunnel == NULL) { err = -ENOMEM; @@ -1520,72 +1437,126 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 rwlock_init(&tunnel->hlist_lock); tunnel->acpt_newsess = true; - /* The net we belong to */ - tunnel->l2tp_net = net; - pn = l2tp_pernet(net); - if (cfg != NULL) tunnel->debug = cfg->debug; - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; - if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg = { }; - - udp_cfg.sk_user_data = tunnel; - udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; - udp_cfg.encap_rcv = l2tp_udp_encap_recv; - udp_cfg.encap_destroy = l2tp_udp_encap_destroy; - - setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - sk->sk_user_data = tunnel; - } - /* Bump the reference count. The tunnel context is deleted - * only when this drops to zero. A reference is also held on - * the tunnel socket to ensure that it is not released while - * the tunnel is extant. Must be done before sk_destruct is - * set. - */ refcount_set(&tunnel->ref_count, 1); - sock_hold(sk); - tunnel->sock = sk; tunnel->fd = fd; - /* Hook on the tunnel socket destructor so that we can cleanup - * if the tunnel socket goes away. - */ - tunnel->old_sk_destruct = sk->sk_destruct; - sk->sk_destruct = &l2tp_tunnel_destruct; - lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); - - sk->sk_allocation = GFP_ATOMIC; - /* Init delete workqueue struct */ INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); - /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); err = 0; err: if (tunnelp) *tunnelp = tunnel; - /* If tunnel's socket was created by the kernel, it doesn't - * have a file. - */ - if (sock && sock->file) - sockfd_put(sock); - return err; } EXPORT_SYMBOL_GPL(l2tp_tunnel_create); +static int l2tp_validate_socket(const struct sock *sk, const struct net *net, + enum l2tp_encap_type encap) +{ + if (!net_eq(sock_net(sk), net)) + return -EINVAL; + + if (sk->sk_type != SOCK_DGRAM) + return -EPROTONOSUPPORT; + + if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) || + (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP)) + return -EPROTONOSUPPORT; + + if (sk->sk_user_data) + return -EBUSY; + + return 0; +} + +int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + struct l2tp_tunnel_cfg *cfg) +{ + struct l2tp_tunnel *tunnel_walk; + struct l2tp_net *pn; + struct socket *sock; + struct sock *sk; + int ret; + + if (tunnel->fd < 0) { + ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id, + tunnel->peer_tunnel_id, cfg, + &sock); + if (ret < 0) + goto err; + } else { + sock = sockfd_lookup(tunnel->fd, &ret); + if (!sock) + goto err; + + ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + } + + sk = sock->sk; + + sock_hold(sk); + tunnel->sock = sk; + tunnel->l2tp_net = net; + + pn = l2tp_pernet(net); + + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { + if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + + ret = -EEXIST; + goto err_sock; + } + } + list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + struct udp_tunnel_sock_cfg udp_cfg = { + .sk_user_data = tunnel, + .encap_type = UDP_ENCAP_L2TPINUDP, + .encap_rcv = l2tp_udp_encap_recv, + .encap_destroy = l2tp_udp_encap_destroy, + }; + + setup_udp_tunnel_sock(net, sock, &udp_cfg); + } else { + sk->sk_user_data = tunnel; + } + + tunnel->old_sk_destruct = sk->sk_destruct; + sk->sk_destruct = &l2tp_tunnel_destruct; + lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, + "l2tp_sock"); + sk->sk_allocation = GFP_ATOMIC; + + if (tunnel->fd >= 0) + sockfd_put(sock); + + return 0; + +err_sock: + if (tunnel->fd < 0) + sock_release(sock); + else + sockfd_put(sock); +err: + return ret; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_register); + /* This function is used by the netlink TUNNEL_DELETE command. */ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 2718d0b284d0..ba33cbec71eb 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -220,12 +220,14 @@ struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); -struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + struct l2tp_tunnel_cfg *cfg); + void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index e7ea9c4b89ff..b05dbd9ffcb2 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -236,12 +236,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info if (info->attrs[L2TP_ATTR_DEBUG]) cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) { - ret = -EEXIST; - goto out; - } - ret = -EINVAL; switch (cfg.encap) { case L2TP_ENCAPTYPE_UDP: @@ -251,9 +245,19 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info break; } - if (ret >= 0) - ret = l2tp_tunnel_notify(&l2tp_nl_family, info, - tunnel, L2TP_CMD_TUNNEL_CREATE); + if (ret < 0) + goto out; + + l2tp_tunnel_inc_refcount(tunnel); + ret = l2tp_tunnel_register(tunnel, net, &cfg); + if (ret < 0) { + kfree(tunnel); + goto out; + } + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, + L2TP_CMD_TUNNEL_CREATE); + l2tp_tunnel_dec_refcount(tunnel); + out: return ret; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d6deca11da19..896bbca9bdaa 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -698,6 +698,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel); if (error < 0) goto end; + + l2tp_tunnel_inc_refcount(tunnel); + error = l2tp_tunnel_register(tunnel, sock_net(sk), + &tcfg); + if (error < 0) { + kfree(tunnel); + goto end; + } + drop_tunnel = true; } } else { /* Error if we can't find the tunnel */ diff --git a/net/rds/send.c b/net/rds/send.c index acad04243b41..94c7f74909be 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -1017,10 +1017,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) if (conn->c_npaths == 0 && hash != 0) { rds_send_ping(conn, 0); - if (conn->c_npaths == 0) { - wait_event_interruptible(conn->c_hs_waitq, - (conn->c_npaths != 0)); - } + /* The underlying connection is not up yet. Need to wait + * until it is up to be sure that the non-zero c_path can be + * used. But if we are interrupted, we have to use the zero + * c_path in case the connection ends up being non-MP capable. + */ + if (conn->c_npaths == 0) + if (wait_event_interruptible(conn->c_hs_waitq, + conn->c_npaths != 0)) + hash = 0; if (conn->c_npaths == 1) hash = 0; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 806395687bb6..c2266f387213 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task) dprint_status(task); - trace_rpc_connect_status(task, status); + trace_rpc_connect_status(task); task->tk_status = 0; switch (status) { case -ECONNREFUSED: @@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task) case -EPERM: if (RPC_IS_SOFTCONN(task)) { xprt_end_transmit(task); + if (!task->tk_msg.rpc_proc->p_proc) + trace_xprt_ping(task->tk_xprt, + task->tk_status); rpc_exit(task, task->tk_status); break; } @@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; int status; + if (!task->tk_msg.rpc_proc->p_proc) + trace_xprt_ping(task->tk_xprt, task->tk_status); + if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) task->tk_status = req->rq_reply_bytes_recvd; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d9db2eab3a8d..3fe5d60ab0e2 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task) { rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - trace_rpc_task_begin(task->tk_client, task, NULL); + trace_rpc_task_begin(task, NULL); } /* @@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; - trace_rpc_task_complete(task->tk_client, task, NULL); + trace_rpc_task_complete(task, NULL); spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); @@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task->tk_client, task, q); + trace_rpc_task_sleep(task, q); __rpc_add_wait_queue(q, task, queue_priority); @@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, return; } - trace_rpc_task_wakeup(task->tk_client, task, queue); + trace_rpc_task_wakeup(task, queue); __rpc_remove_wait_queue(queue, task); @@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task) } if (!do_action) break; - trace_rpc_task_run_action(task->tk_client, task, do_action); + trace_rpc_task_run_action(task, do_action); do_action(task); /* diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1e671333c3d5..f68aa46c9dd7 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -24,6 +24,8 @@ #include <linux/sunrpc/metrics.h> #include <linux/rcupdate.h> +#include <trace/events/sunrpc.h> + #include "netns.h" #define RPCDBG_FACILITY RPCDBG_MISC @@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, struct rpc_iostats *op_metrics) { struct rpc_rqst *req = task->tk_rqstp; - ktime_t delta, now; + ktime_t backlog, execute, now; if (!op_metrics || !req) return; @@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; + backlog = 0; if (ktime_to_ns(req->rq_xtime)) { - delta = ktime_sub(req->rq_xtime, task->tk_start); - op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); + backlog = ktime_sub(req->rq_xtime, task->tk_start); + op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog); } + op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); - delta = ktime_sub(now, task->tk_start); - op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); + execute = ktime_sub(now, task->tk_start); + op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); spin_unlock(&op_metrics->om_lock); + + trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute); } EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index f2b7cb540e61..09a0315ea77b 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -37,12 +37,6 @@ struct rpc_buffer { char data[]; }; -static inline int rpc_reply_expected(struct rpc_task *task) -{ - return (task->tk_msg.rpc_proc != NULL) && - (task->tk_msg.rpc_proc->p_decode != NULL); -} - static inline int sock_is_loopback(struct sock *sk) { struct dst_entry *dst; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e34f4ee7f2b6..30afbd236656 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1519,6 +1519,88 @@ out: EXPORT_SYMBOL_GPL(xdr_process_buf); /** + * xdr_stream_decode_opaque - Decode variable length opaque + * @xdr: pointer to xdr_stream + * @ptr: location to store opaque data + * @size: size of storage buffer @ptr + * + * Return values: + * On success, returns size of object stored in *@ptr + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE on overflow of storage buffer @ptr + */ +ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size) +{ + ssize_t ret; + void *p; + + ret = xdr_stream_decode_opaque_inline(xdr, &p, size); + if (ret <= 0) + return ret; + memcpy(ptr, p, ret); + return ret; +} +EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque); + +/** + * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque + * @xdr: pointer to xdr_stream + * @ptr: location to store pointer to opaque data + * @maxlen: maximum acceptable object size + * @gfp_flags: GFP mask to use + * + * Return values: + * On success, returns size of object stored in *@ptr + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the object would exceed @maxlen + * %-ENOMEM on memory allocation failure + */ +ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, + size_t maxlen, gfp_t gfp_flags) +{ + ssize_t ret; + void *p; + + ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); + if (ret > 0) { + *ptr = kmemdup(p, ret, gfp_flags); + if (*ptr != NULL) + return ret; + ret = -ENOMEM; + } + *ptr = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup); + +/** + * xdr_stream_decode_string - Decode variable length string + * @xdr: pointer to xdr_stream + * @str: location to store string + * @size: size of storage buffer @str + * + * Return values: + * On success, returns length of NUL-terminated string stored in *@str + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE on overflow of storage buffer @str + */ +ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size) +{ + ssize_t ret; + void *p; + + ret = xdr_stream_decode_opaque_inline(xdr, &p, size); + if (ret > 0) { + memcpy(str, p, ret); + str[ret] = '\0'; + return strlen(str); + } + *str = '\0'; + return ret; +} +EXPORT_SYMBOL_GPL(xdr_stream_decode_string); + +/** * xdr_stream_decode_string_dup - Decode and duplicate variable length string * @xdr: pointer to xdr_stream * @str: location to store pointer to string diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8f0ad4f268da..70f005044f06 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task) * @xprt: transport on which the original request was transmitted * @xid: RPC XID of incoming reply * + * Caller holds xprt->recv_lock. */ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { @@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) list_for_each_entry(entry, &xprt->recv, rq_list) if (entry->rq_xid == xid) { trace_xprt_lookup_rqst(xprt, xid, 0); + entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); return entry; } @@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock) } } -static void xprt_update_rtt(struct rpc_task *task) +/** + * xprt_update_rtt - Update RPC RTT statistics + * @task: RPC request that recently completed + * + * Caller holds xprt->recv_lock. + */ +void xprt_update_rtt(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_rtt *rtt = task->tk_client->cl_rtt; @@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task) rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } } +EXPORT_SYMBOL_GPL(xprt_update_rtt); /** * xprt_complete_rqst - called when reply processing is complete * @task: RPC request that recently completed * @copied: actual number of bytes received from the transport * - * Caller holds transport lock. + * Caller holds xprt->recv_lock. */ void xprt_complete_rqst(struct rpc_task *task, int copied) { @@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) trace_xprt_complete_rqst(xprt, req->rq_xid, copied); xprt->stat.recvs++; - req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); - if (xprt->ops->timer != NULL) - xprt_update_rtt(task); list_del_init(&req->rq_list); req->rq_private_buf.len = copied; @@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; unsigned int connect_cookie; - int status, numreqs; + int status; dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); @@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task) return; connect_cookie = xprt->connect_cookie; - req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { @@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task) xprt->ops->set_retrans_timeout(task); - numreqs = atomic_read(&xprt->num_reqs); - if (numreqs > xprt->stat.max_slots) - xprt->stat.max_slots = numreqs; xprt->stat.sends++; xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; xprt->stat.bklog_u += xprt->backlog.qlen; @@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); - if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) + if (xprt->num_reqs >= xprt->max_reqs) goto out; + ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; - atomic_dec(&xprt->num_reqs); + --xprt->num_reqs; req = ERR_PTR(-ENOMEM); out: return req; @@ -1121,7 +1124,8 @@ out: static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { - if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { + if (xprt->num_reqs > xprt->min_reqs) { + --xprt->num_reqs; kfree(req); return true; } @@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) spin_unlock(&xprt->reserve_lock); return; out_init_req: + xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, + xprt->num_reqs); task->tk_status = 0; task->tk_rqstp = req; xprt_request_init(task, xprt); @@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, else xprt->max_reqs = num_prealloc; xprt->min_reqs = num_prealloc; - atomic_set(&xprt->num_reqs, num_prealloc); + xprt->num_reqs = num_prealloc; return xprt; diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ed1a4a3065ee..47ebac949769 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, if (IS_ERR(req)) return PTR_ERR(req); - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, - DMA_TO_DEVICE, GFP_KERNEL); - if (IS_ERR(rb)) - goto out_fail; - req->rl_rdmabuf = rb; - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); - size = r_xprt->rx_data.inline_rsize; rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index d5f95bb39300..5cc68a824f45 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr = rpcrdma_mr_get(r_xprt); if (!mr) - return ERR_PTR(-ENOBUFS); + return ERR_PTR(-EAGAIN); pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -251,6 +251,16 @@ out_maperr: return ERR_PTR(-EIO); } +/* Post Send WR containing the RPC Call message. + */ +static int +fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + struct ib_send_wr *bad_wr; + + return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr); +} + /* Invalidate all memory regions that were registered for "req". * * Sleeps until it is safe for the host CPU to access the @@ -305,6 +315,7 @@ out_reset: const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_map = fmr_op_map, + .ro_send = fmr_op_send, .ro_unmap_sync = fmr_op_unmap_sync, .ro_recover_mr = fmr_op_recover_mr, .ro_open = fmr_op_open, diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 90f688f19783..c5743a0960be 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mr *mr; struct ib_mr *ibmr; struct ib_reg_wr *reg_wr; - struct ib_send_wr *bad_wr; - int rc, i, n; + int i, n; u8 key; mr = NULL; @@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, rpcrdma_mr_defer_recovery(mr); mr = rpcrdma_mr_get(r_xprt); if (!mr) - return ERR_PTR(-ENOBUFS); + return ERR_PTR(-EAGAIN); } while (mr->frwr.fr_state != FRWR_IS_INVALID); frwr = &mr->frwr; frwr->fr_state = FRWR_IS_VALID; @@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ib_update_fast_reg_key(ibmr, ++key); reg_wr = &frwr->fr_regwr; - reg_wr->wr.next = NULL; - reg_wr->wr.opcode = IB_WR_REG_MR; - frwr->fr_cqe.done = frwr_wc_fastreg; - reg_wr->wr.wr_cqe = &frwr->fr_cqe; - reg_wr->wr.num_sge = 0; - reg_wr->wr.send_flags = 0; reg_wr->mr = ibmr; reg_wr->key = ibmr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); - if (rc) - goto out_senderr; - mr->mr_handle = ibmr->rkey; mr->mr_length = ibmr->length; mr->mr_offset = ibmr->iova; @@ -442,11 +431,40 @@ out_mapmr_err: frwr->fr_mr, n, mr->mr_nents); rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-EIO); +} -out_senderr: - pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); - rpcrdma_mr_defer_recovery(mr); - return ERR_PTR(-ENOTCONN); +/* Post Send WR containing the RPC Call message. + * + * For FRMR, chain any FastReg WRs to the Send WR. Only a + * single ib_post_send call is needed to register memory + * and then post the Send WR. + */ +static int +frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + struct ib_send_wr *post_wr, *bad_wr; + struct rpcrdma_mr *mr; + + post_wr = &req->rl_sendctx->sc_wr; + list_for_each_entry(mr, &req->rl_registered, mr_list) { + struct rpcrdma_frwr *frwr; + + frwr = &mr->frwr; + + frwr->fr_cqe.done = frwr_wc_fastreg; + frwr->fr_regwr.wr.next = post_wr; + frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe; + frwr->fr_regwr.wr.num_sge = 0; + frwr->fr_regwr.wr.opcode = IB_WR_REG_MR; + frwr->fr_regwr.wr.send_flags = 0; + + post_wr = &frwr->fr_regwr.wr; + } + + /* If ib_post_send fails, the next ->send_request for + * @req will queue these MWs for recovery. + */ + return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); } /* Handle a remotely invalidated mr on the @mrs list @@ -561,6 +579,7 @@ reset_mrs: const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_map = frwr_op_map, + .ro_send = frwr_op_send, .ro_reminv = frwr_op_reminv, .ro_unmap_sync = frwr_op_unmap_sync, .ro_recover_mr = frwr_op_recover_mr, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index f0855a959a27..e8adad33d0bb 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, false, &mr); if (IS_ERR(seg)) - return PTR_ERR(seg); + goto out_maperr; rpcrdma_mr_push(mr, &req->rl_registered); if (encode_read_segment(xdr, mr, pos) < 0) @@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } while (nsegs); return 0; + +out_maperr: + if (PTR_ERR(seg) == -EAGAIN) + xprt_wait_for_buffer_space(rqst->rq_task, NULL); + return PTR_ERR(seg); } /* Register and XDR encode the Write list. Supports encoding a list @@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) - return PTR_ERR(seg); + goto out_maperr; rpcrdma_mr_push(mr, &req->rl_registered); if (encode_rdma_segment(xdr, mr) < 0) @@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, *segcount = cpu_to_be32(nchunks); return 0; + +out_maperr: + if (PTR_ERR(seg) == -EAGAIN) + xprt_wait_for_buffer_space(rqst->rq_task, NULL); + return PTR_ERR(seg); } /* Register and XDR encode the Reply chunk. Supports encoding an array @@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) - return PTR_ERR(seg); + goto out_maperr; rpcrdma_mr_push(mr, &req->rl_registered); if (encode_rdma_segment(xdr, mr) < 0) @@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, *segcount = cpu_to_be32(nchunks); return 0; + +out_maperr: + if (PTR_ERR(seg) == -EAGAIN) + xprt_wait_for_buffer_space(rqst->rq_task, NULL); + return PTR_ERR(seg); } /** @@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, * Returns: * %0 if the RPC was sent successfully, * %-ENOTCONN if the connection was lost, - * %-EAGAIN if not enough pages are available for on-demand reply buffer, - * %-ENOBUFS if no MRs are available to register chunks, + * %-EAGAIN if the caller should call again with the same arguments, + * %-ENOBUFS if the caller should call again after a delay, * %-EMSGSIZE if the transport header is too small, * %-EIO if a permanent problem occurred while marshaling. */ @@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) return 0; out_err: - if (ret != -ENOBUFS) { - pr_err("rpcrdma: header marshaling failed (%d)\n", ret); - r_xprt->rx_stats.failed_marshal_count++; - } + r_xprt->rx_stats.failed_marshal_count++; return ret; } @@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); - queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); + queue_work(rpcrdma_receive_wq, &rep->rr_work); return; out_badstatus: diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 4b1ecfe979cf..cc1aad325496 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -52,7 +52,6 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/sunrpc/addr.h> -#include <linux/smp.h> #include "xprt_rdma.h" @@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work) struct rpc_xprt *xprt = &r_xprt->rx_xprt; spin_lock_bh(&xprt->transport_lock); - if (++xprt->connect_cookie == 0) /* maintain a reserved value */ - ++xprt->connect_cookie; if (ep->rep_connected > 0) { if (!xprt_test_and_set_connected(xprt)) xprt_wake_pending_tasks(xprt, 0); @@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) } } -/* Allocate a fixed-size buffer in which to construct and send the - * RPC-over-RDMA header for this request. - */ -static bool -rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - gfp_t flags) -{ - size_t size = RPCRDMA_HDRBUF_SIZE; - struct rpcrdma_regbuf *rb; - - if (req->rl_rdmabuf) - return true; - - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); - if (IS_ERR(rb)) - return false; - - r_xprt->rx_stats.hardway_register_count += size; - req->rl_rdmabuf = rb; - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); - return true; -} - static bool rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, size_t size, gfp_t flags) @@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task) if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; - if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) - goto out_fail; if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) goto out_fail; if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - req->rl_cpu = smp_processor_id(); - req->rl_connect_cookie = 0; /* our reserved value */ rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; @@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task) * Returns: * %0 if the RPC message has been sent * %-ENOTCONN if the caller should reconnect and call again - * %-ENOBUFS if the caller should call again later + * %-EAGAIN if the caller should call again + * %-ENOBUFS if the caller should call again after a delay * %-EIO if a permanent error occurred and the request was not * sent. Do not try to send this message again. */ @@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task) rpcrdma_recv_buffer_get(req); /* Must suppress retransmit to maintain credits */ - if (req->rl_connect_cookie == xprt->connect_cookie) + if (rqst->rq_connect_cookie == xprt->connect_cookie) goto drop_connection; - req->rl_connect_cookie = xprt->connect_cookie; + rqst->rq_xtime = ktime_get(); __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) @@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task) rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; + + /* An RPC with no reply will throw off credit accounting, + * so drop the connection to reset the credit grant. + */ + if (!rpc_reply_expected(task)) + goto drop_connection; return 0; failed_marshal: diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e6f84a6434a0..fe5eaca2d197 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) wait_for_completion(&ia->ri_remove_done); ia->ri_id = NULL; - ia->ri_pd = NULL; ia->ri_device = NULL; /* Return 1 to ensure the core destroys the id. */ return 1; case RDMA_CM_EVENT_ESTABLISHED: + ++xprt->rx_xprt.connect_cookie; connstate = 1; rpcrdma_update_connect_private(xprt, &event->param.conn); goto connected; @@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) connstate = -EAGAIN; goto connected; case RDMA_CM_EVENT_DISCONNECTED: + ++xprt->rx_xprt.connect_cookie; connstate = -ECONNABORTED; connected: xprt->rx_buf.rb_credits = 1; @@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) ia->ri_id->qp = NULL; } ib_free_cq(ep->rep_attr.recv_cq); + ep->rep_attr.recv_cq = NULL; ib_free_cq(ep->rep_attr.send_cq); + ep->rep_attr.send_cq = NULL; /* The ULP is responsible for ensuring all DMA * mappings and MRs are gone. @@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); } rpcrdma_mrs_destroy(buf); + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; /* Allow waiters to continue */ complete(&ia->ri_remove_done); @@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; - if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ - ep->rep_remote_cma.responder_resources = 32; - else - ep->rep_remote_cma.responder_resources = - ia->ri_device->attrs.max_qp_rd_atom; + ep->rep_remote_cma.responder_resources = + min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing @@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { cancel_delayed_work_sync(&ep->rep_connect_worker); - if (ia->ri_id->qp) { + if (ia->ri_id && ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } - ib_free_cq(ep->rep_attr.recv_cq); - ib_free_cq(ep->rep_attr.send_cq); + if (ep->rep_attr.recv_cq) + ib_free_cq(ep->rep_attr.recv_cq); + if (ep->rep_attr.send_cq) + ib_free_cq(ep->rep_attr.send_cq); } /* Re-establish a connection after a device removal event. @@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) LIST_HEAD(free); LIST_HEAD(all); - for (count = 0; count < 32; count++) { + for (count = 0; count < 3; count++) { struct rpcrdma_mr *mr; int rc; @@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) list_splice(&all, &buf->rb_all); r_xprt->rx_stats.mrs_allocated += count; spin_unlock(&buf->rb_mrlock); - trace_xprtrdma_createmrs(r_xprt, count); + + xprt_write_space(&r_xprt->rx_xprt); } static void @@ -1068,17 +1073,27 @@ struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; + struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) return ERR_PTR(-ENOMEM); + rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, + DMA_TO_DEVICE, GFP_KERNEL); + if (IS_ERR(rb)) { + kfree(req); + return ERR_PTR(-ENOMEM); + } + req->rl_rdmabuf = rb; + xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); + req->rl_buffer = buffer; + INIT_LIST_HEAD(&req->rl_registered); + spin_lock(&buffer->rb_reqslock); list_add(&req->rl_all, &buffer->rb_allreqs); spin_unlock(&buffer->rb_reqslock); - req->rl_buffer = &r_xprt->rx_buf; - INIT_LIST_HEAD(&req->rl_registered); return req; } @@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_req *req) { struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; - struct ib_send_wr *send_wr_fail; int rc; if (req->rl_reply) { @@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, --ep->rep_send_count; } - rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); + rc = ia->ri_ops->ro_send(ia, req); trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 69883a960a3f..3d3b423fa9c1 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -334,8 +334,6 @@ enum { struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_list; - int rl_cpu; - unsigned int rl_connect_cookie; struct rpcrdma_buffer *rl_buffer; struct rpcrdma_rep *rl_reply; struct xdr_stream rl_stream; @@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops { (*ro_map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool, struct rpcrdma_mr **); + int (*ro_send)(struct rpcrdma_ia *ia, + struct rpcrdma_req *req); void (*ro_reminv)(struct rpcrdma_rep *rep, struct list_head *mrs); void (*ro_unmap_sync)(struct rpcrdma_xprt *, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 956e29c1438d..c8902f11efdd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task) xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); + req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent, true, &sent); dprintk("RPC: %s(%u) = %d\n", @@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task) if (!xprt_bound(xprt)) return -ENOTCONN; + req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent, true, &sent); @@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ + req->rq_xtime = ktime_get(); while (1) { sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, @@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, if (!rovr) goto out_unlock; xprt_pin_rqst(rovr); + xprt_update_rtt(rovr->rq_task); spin_unlock(&xprt->recv_lock); task = rovr->rq_task; diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris deleted file mode 120000 index 736d998ba506..000000000000 --- a/scripts/dtc/include-prefixes/cris +++ /dev/null @@ -1 +0,0 @@ -../../../arch/cris/boot/dts
\ No newline at end of file diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag deleted file mode 120000 index 87a3c847db8f..000000000000 --- a/scripts/dtc/include-prefixes/metag +++ /dev/null @@ -1 +0,0 @@ -../../../arch/metag/boot/dts
\ No newline at end of file |